summaryrefslogtreecommitdiffstats
path: root/src/basic
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/basic/MurmurHash2.c90
-rw-r--r--src/basic/MurmurHash2.h30
-rw-r--r--src/basic/af-list.c40
-rw-r--r--src/basic/af-list.h25
-rw-r--r--src/basic/af-to-name.awk9
-rw-r--r--src/basic/alloc-util.c81
-rw-r--r--src/basic/alloc-util.h162
-rw-r--r--src/basic/architecture.c178
-rw-r--r--src/basic/architecture.h237
-rw-r--r--src/basic/arphrd-list.c41
-rw-r--r--src/basic/arphrd-list.h7
-rw-r--r--src/basic/arphrd-to-name.awk9
-rw-r--r--src/basic/async.c107
-rw-r--r--src/basic/async.h7
-rw-r--r--src/basic/audit-util.c89
-rw-r--r--src/basic/audit-util.h17
-rw-r--r--src/basic/blockdev-util.c181
-rw-r--r--src/basic/blockdev-util.h20
-rw-r--r--src/basic/btrfs-util.c1993
-rw-r--r--src/basic/btrfs-util.h120
-rw-r--r--src/basic/build.h156
-rw-r--r--src/basic/bus-label.c81
-rw-r--r--src/basic/bus-label.h15
-rw-r--r--src/basic/cap-list.c119
-rw-r--r--src/basic/cap-list.h9
-rw-r--r--src/basic/cap-to-name.awk9
-rw-r--r--src/basic/capability-util.c487
-rw-r--r--src/basic/capability-util.h70
-rw-r--r--src/basic/cgroup-util.c2933
-rw-r--r--src/basic/cgroup-util.h275
-rw-r--r--src/basic/chattr-util.c96
-rw-r--r--src/basic/chattr-util.h8
-rw-r--r--src/basic/conf-files.c325
-rw-r--r--src/basic/conf-files.h23
-rw-r--r--src/basic/copy.c936
-rw-r--r--src/basic/copy.h61
-rw-r--r--src/basic/def.h77
-rw-r--r--src/basic/device-nodes.c63
-rw-r--r--src/basic/device-nodes.h16
-rw-r--r--src/basic/dirent-util.c71
-rw-r--r--src/basic/dirent-util.h36
-rw-r--r--src/basic/env-file.c564
-rw-r--r--src/basic/env-file.h17
-rw-r--r--src/basic/env-util.c752
-rw-r--r--src/basic/env-util.h47
-rw-r--r--src/basic/errno-list.c37
-rw-r--r--src/basic/errno-list.h15
-rw-r--r--src/basic/errno-to-name.awk9
-rw-r--r--src/basic/escape.c506
-rw-r--r--src/basic/escape.h53
-rw-r--r--src/basic/ether-addr-util.c111
-rw-r--r--src/basic/ether-addr-util.h28
-rw-r--r--src/basic/extract-word.c285
-rw-r--r--src/basic/extract-word.h17
-rw-r--r--src/basic/fd-util.c967
-rw-r--r--src/basic/fd-util.h110
-rw-r--r--src/basic/fileio.c822
-rw-r--r--src/basic/fileio.h78
-rw-r--r--src/basic/format-util.h67
-rw-r--r--src/basic/fs-util.c1358
-rw-r--r--src/basic/fs-util.h111
-rw-r--r--src/basic/gcrypt-util.c52
-rw-r--r--src/basic/gcrypt-util.h34
-rwxr-xr-xsrc/basic/generate-af-list.sh6
-rwxr-xr-xsrc/basic/generate-arphrd-list.sh6
-rwxr-xr-xsrc/basic/generate-cap-list.sh6
-rwxr-xr-xsrc/basic/generate-errno-list.sh5
-rw-r--r--src/basic/glob-util.c73
-rw-r--r--src/basic/glob-util.h22
-rw-r--r--src/basic/gunicode.c110
-rw-r--r--src/basic/gunicode.h30
-rw-r--r--src/basic/hash-funcs.c91
-rw-r--r--src/basic/hash-funcs.h106
-rw-r--r--src/basic/hashmap.c1911
-rw-r--r--src/basic/hashmap.h429
-rw-r--r--src/basic/hexdecoct.c819
-rw-r--r--src/basic/hexdecoct.h38
-rw-r--r--src/basic/hostname-util.c308
-rw-r--r--src/basic/hostname-util.h28
-rw-r--r--src/basic/in-addr-util.c622
-rw-r--r--src/basic/in-addr-util.h70
-rw-r--r--src/basic/io-util.c264
-rw-r--r--src/basic/io-util.h75
-rw-r--r--src/basic/ioprio.h56
-rw-r--r--src/basic/khash.c322
-rw-r--r--src/basic/khash.h37
-rw-r--r--src/basic/label.c64
-rw-r--r--src/basic/label.h18
-rw-r--r--src/basic/list.h171
-rw-r--r--src/basic/locale-util.c460
-rw-r--r--src/basic/locale-util.h84
-rw-r--r--src/basic/log.c1324
-rw-r--r--src/basic/log.h324
-rw-r--r--src/basic/login-util.c14
-rw-r--r--src/basic/login-util.h11
-rw-r--r--src/basic/macro.h549
-rw-r--r--src/basic/memfd-util.c154
-rw-r--r--src/basic/memfd-util.h18
-rw-r--r--src/basic/mempool.c99
-rw-r--r--src/basic/mempool.h31
-rw-r--r--src/basic/meson.build310
-rw-r--r--src/basic/missing.h25
-rw-r--r--src/basic/missing_audit.h24
-rw-r--r--src/basic/missing_btrfs.h22
-rw-r--r--src/basic/missing_btrfs_tree.h109
-rw-r--r--src/basic/missing_capability.h12
-rw-r--r--src/basic/missing_drm.h10
-rw-r--r--src/basic/missing_ethtool.h131
-rw-r--r--src/basic/missing_fcntl.h60
-rw-r--r--src/basic/missing_fib_rules.h45
-rw-r--r--src/basic/missing_fou.h55
-rw-r--r--src/basic/missing_fs.h63
-rw-r--r--src/basic/missing_if_bridge.h21
-rw-r--r--src/basic/missing_if_link.h393
-rw-r--r--src/basic/missing_if_tunnel.h59
-rw-r--r--src/basic/missing_input.h45
-rw-r--r--src/basic/missing_keyctl.h78
-rw-r--r--src/basic/missing_magic.h34
-rw-r--r--src/basic/missing_mman.h12
-rw-r--r--src/basic/missing_network.h155
-rw-r--r--src/basic/missing_prctl.h14
-rw-r--r--src/basic/missing_random.h16
-rw-r--r--src/basic/missing_resource.h11
-rw-r--r--src/basic/missing_sched.h21
-rw-r--r--src/basic/missing_securebits.h17
-rw-r--r--src/basic/missing_socket.h60
-rw-r--r--src/basic/missing_stat.h51
-rw-r--r--src/basic/missing_stdlib.h13
-rw-r--r--src/basic/missing_syscall.h446
-rw-r--r--src/basic/missing_timerfd.h8
-rw-r--r--src/basic/missing_type.h12
-rw-r--r--src/basic/missing_vxcan.h12
-rw-r--r--src/basic/mkdir-label.c58
-rw-r--r--src/basic/mkdir.c166
-rw-r--r--src/basic/mkdir.h26
-rw-r--r--src/basic/mountpoint-util.c444
-rw-r--r--src/basic/mountpoint-util.h24
-rw-r--r--src/basic/nss-util.h183
-rw-r--r--src/basic/ordered-set.c47
-rw-r--r--src/basic/ordered-set.h62
-rw-r--r--src/basic/parse-util.c779
-rw-r--r--src/basic/parse-util.h120
-rw-r--r--src/basic/path-util.c1150
-rw-r--r--src/basic/path-util.h190
-rw-r--r--src/basic/prioq.c300
-rw-r--r--src/basic/prioq.h32
-rw-r--r--src/basic/proc-cmdline.c363
-rw-r--r--src/basic/proc-cmdline.h41
-rw-r--r--src/basic/process-util.c1565
-rw-r--r--src/basic/process-util.h194
-rw-r--r--src/basic/procfs-util.c268
-rw-r--r--src/basic/procfs-util.h17
-rw-r--r--src/basic/random-util.c265
-rw-r--r--src/basic/random-util.h33
-rw-r--r--src/basic/ratelimit.c38
-rw-r--r--src/basic/ratelimit.h40
-rw-r--r--src/basic/raw-clone.h79
-rw-r--r--src/basic/raw-reboot.h14
-rw-r--r--src/basic/refcnt.h54
-rw-r--r--src/basic/replace-var.c94
-rw-r--r--src/basic/replace-var.h4
-rw-r--r--src/basic/rlimit-util.c410
-rw-r--r--src/basic/rlimit-util.h25
-rw-r--r--src/basic/rm-rf.c220
-rw-r--r--src/basic/rm-rf.h32
-rw-r--r--src/basic/selinux-util.c518
-rw-r--r--src/basic/selinux-util.h34
-rw-r--r--src/basic/set.h130
-rw-r--r--src/basic/sigbus.c139
-rw-r--r--src/basic/sigbus.h7
-rw-r--r--src/basic/signal-util.c290
-rw-r--r--src/basic/signal-util.h43
-rw-r--r--src/basic/siphash24.c200
-rw-r--r--src/basic/siphash24.h28
-rw-r--r--src/basic/smack-util.c289
-rw-r--r--src/basic/smack-util.h42
-rw-r--r--src/basic/socket-label.c163
-rw-r--r--src/basic/socket-util.c1347
-rw-r--r--src/basic/socket-util.h200
-rw-r--r--src/basic/sparse-endian.h90
-rw-r--r--src/basic/special.h105
-rw-r--r--src/basic/stat-util.c427
-rw-r--r--src/basic/stat-util.h90
-rw-r--r--src/basic/static-destruct.h56
-rw-r--r--src/basic/stdio-util.h64
-rw-r--r--src/basic/strbuf.c183
-rw-r--r--src/basic/strbuf.h39
-rw-r--r--src/basic/string-table.c17
-rw-r--r--src/basic/string-table.h112
-rw-r--r--src/basic/string-util.c1099
-rw-r--r--src/basic/string-util.h266
-rw-r--r--src/basic/strv.c889
-rw-r--r--src/basic/strv.h190
-rw-r--r--src/basic/strxcpyx.c105
-rw-r--r--src/basic/strxcpyx.h12
-rw-r--r--src/basic/syslog-util.c99
-rw-r--r--src/basic/syslog-util.h14
-rw-r--r--src/basic/terminal-util.c1318
-rw-r--r--src/basic/terminal-util.h160
-rw-r--r--src/basic/time-util.c1471
-rw-r--r--src/basic/time-util.h179
-rw-r--r--src/basic/tmpfile-util.c330
-rw-r--r--src/basic/tmpfile-util.h19
-rw-r--r--src/basic/umask-util.h28
-rw-r--r--src/basic/unaligned.h99
-rw-r--r--src/basic/unit-def.c273
-rw-r--r--src/basic/unit-def.h284
-rw-r--r--src/basic/unit-name.c775
-rw-r--r--src/basic/unit-name.h65
-rw-r--r--src/basic/user-util.c857
-rw-r--r--src/basic/user-util.h115
-rw-r--r--src/basic/utf8.c532
-rw-r--r--src/basic/utf8.h51
-rw-r--r--src/basic/util.c637
-rw-r--r--src/basic/util.h253
-rw-r--r--src/basic/virt.c642
-rw-r--r--src/basic/virt.h57
-rw-r--r--src/basic/xattr-util.c217
-rw-r--r--src/basic/xattr-util.h25
219 files changed, 49459 insertions, 0 deletions
diff --git a/src/basic/MurmurHash2.c b/src/basic/MurmurHash2.c
new file mode 100644
index 0000000..5859af0
--- /dev/null
+++ b/src/basic/MurmurHash2.c
@@ -0,0 +1,90 @@
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+// machines.
+
+#include "MurmurHash2.h"
+
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
+{
+ // 'm' and 'r' are mixing constants generated offline.
+ // They're not really 'magic', they just happen to work well.
+
+ const uint32_t m = 0x5bd1e995;
+ const int r = 24;
+
+ // Initialize the hash to a 'random' value
+
+ uint32_t h = seed ^ len;
+
+ // Mix 4 bytes at a time into the hash
+
+ const unsigned char * data = (const unsigned char *)key;
+
+ while (len >= 4)
+ {
+ uint32_t k = *(uint32_t*)data;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+
+ data += 4;
+ len -= 4;
+ }
+
+ // Handle the last few bytes of the input array
+
+ switch(len)
+ {
+ case 3: h ^= data[2] << 16; /* fall through */
+ case 2: h ^= data[1] << 8; /* fall through */
+ case 1: h ^= data[0]; /* fall through */
+ h *= m;
+ };
+
+ // Do a few final mixes of the hash to ensure the last few
+ // bytes are well-incorporated.
+
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return h;
+}
diff --git a/src/basic/MurmurHash2.h b/src/basic/MurmurHash2.h
new file mode 100644
index 0000000..1aef3af
--- /dev/null
+++ b/src/basic/MurmurHash2.h
@@ -0,0 +1,30 @@
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#pragma once
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
diff --git a/src/basic/af-list.c b/src/basic/af-list.c
new file mode 100644
index 0000000..abad221
--- /dev/null
+++ b/src/basic/af-list.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "af-list.h"
+#include "macro.h"
+
+static const struct af_name* lookup_af(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "af-from-name.h"
+#include "af-to-name.h"
+
+const char *af_to_name(int id) {
+
+ if (id <= 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(af_names))
+ return NULL;
+
+ return af_names[id];
+}
+
+int af_from_name(const char *name) {
+ const struct af_name *sc;
+
+ assert(name);
+
+ sc = lookup_af(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+int af_max(void) {
+ return ELEMENTSOF(af_names);
+}
diff --git a/src/basic/af-list.h b/src/basic/af-list.h
new file mode 100644
index 0000000..8342323
--- /dev/null
+++ b/src/basic/af-list.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/socket.h>
+
+#include "string-util.h"
+
+const char *af_to_name(int id);
+int af_from_name(const char *name);
+
+static inline const char* af_to_name_short(int id) {
+ const char *f;
+
+ if (id == AF_UNSPEC)
+ return "*";
+
+ f = af_to_name(id);
+ if (!f)
+ return "unknown";
+
+ assert(startswith(f, "AF_"));
+ return f + 3;
+}
+
+int af_max(void);
diff --git a/src/basic/af-to-name.awk b/src/basic/af-to-name.awk
new file mode 100644
index 0000000..18d0a89
--- /dev/null
+++ b/src/basic/af-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const af_names[] = { "
+}
+!/AF_FILE/ && !/AF_ROUTE/ && !/AF_LOCAL/ {
+ printf " [%s] = \"%s\",\n", $1, $1
+}
+END{
+ print "};"
+}
diff --git a/src/basic/alloc-util.c b/src/basic/alloc-util.c
new file mode 100644
index 0000000..ab7a42c
--- /dev/null
+++ b/src/basic/alloc-util.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "util.h"
+
+void* memdup(const void *p, size_t l) {
+ void *ret;
+
+ assert(l == 0 || p);
+
+ ret = malloc(l ?: 1);
+ if (!ret)
+ return NULL;
+
+ memcpy(ret, p, l);
+ return ret;
+}
+
+void* memdup_suffix0(const void *p, size_t l) {
+ void *ret;
+
+ assert(l == 0 || p);
+
+ /* The same as memdup() but place a safety NUL byte after the allocated memory */
+
+ ret = malloc(l + 1);
+ if (!ret)
+ return NULL;
+
+ *((uint8_t*) mempcpy(ret, p, l)) = 0;
+ return ret;
+}
+
+void* greedy_realloc(void **p, size_t *allocated, size_t need, size_t size) {
+ size_t a, newalloc;
+ void *q;
+
+ assert(p);
+ assert(allocated);
+
+ if (*allocated >= need)
+ return *p;
+
+ newalloc = MAX(need * 2, 64u / size);
+ a = newalloc * size;
+
+ /* check for overflows */
+ if (a < size * need)
+ return NULL;
+
+ q = realloc(*p, a);
+ if (!q)
+ return NULL;
+
+ *p = q;
+ *allocated = newalloc;
+ return q;
+}
+
+void* greedy_realloc0(void **p, size_t *allocated, size_t need, size_t size) {
+ size_t prev;
+ uint8_t *q;
+
+ assert(p);
+ assert(allocated);
+
+ prev = *allocated;
+
+ q = greedy_realloc(p, allocated, need, size);
+ if (!q)
+ return NULL;
+
+ if (*allocated > prev)
+ memzero(q + prev * size, (*allocated - prev) * size);
+
+ return q;
+}
diff --git a/src/basic/alloc-util.h b/src/basic/alloc-util.h
new file mode 100644
index 0000000..893a123
--- /dev/null
+++ b/src/basic/alloc-util.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <alloca.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "macro.h"
+
+typedef void (*free_func_t)(void *p);
+
+/* If for some reason more than 4M are allocated on the stack, let's abort immediately. It's better than
+ * proceeding and smashing the stack limits. Note that by default RLIMIT_STACK is 8M on Linux. */
+#define ALLOCA_MAX (4U*1024U*1024U)
+
+#define new(t, n) ((t*) malloc_multiply(sizeof(t), (n)))
+
+#define new0(t, n) ((t*) calloc((n) ?: 1, sizeof(t)))
+
+#define newa(t, n) \
+ ({ \
+ size_t _n_ = n; \
+ assert(!size_multiply_overflow(sizeof(t), _n_)); \
+ assert(sizeof(t)*_n_ <= ALLOCA_MAX); \
+ (t*) alloca(sizeof(t)*_n_); \
+ })
+
+#define newa0(t, n) \
+ ({ \
+ size_t _n_ = n; \
+ assert(!size_multiply_overflow(sizeof(t), _n_)); \
+ assert(sizeof(t)*_n_ <= ALLOCA_MAX); \
+ (t*) alloca0(sizeof(t)*_n_); \
+ })
+
+#define newdup(t, p, n) ((t*) memdup_multiply(p, sizeof(t), (n)))
+
+#define newdup_suffix0(t, p, n) ((t*) memdup_suffix0_multiply(p, sizeof(t), (n)))
+
+#define malloc0(n) (calloc(1, (n)))
+
+static inline void *mfree(void *memory) {
+ free(memory);
+ return NULL;
+}
+
+#define free_and_replace(a, b) \
+ ({ \
+ free(a); \
+ (a) = (b); \
+ (b) = NULL; \
+ 0; \
+ })
+
+void* memdup(const void *p, size_t l) _alloc_(2);
+void* memdup_suffix0(const void *p, size_t l) _alloc_(2);
+
+#define memdupa(p, l) \
+ ({ \
+ void *_q_; \
+ size_t _l_ = l; \
+ assert(_l_ <= ALLOCA_MAX); \
+ _q_ = alloca(_l_); \
+ memcpy(_q_, p, _l_); \
+ })
+
+#define memdupa_suffix0(p, l) \
+ ({ \
+ void *_q_; \
+ size_t _l_ = l; \
+ assert(_l_ <= ALLOCA_MAX); \
+ _q_ = alloca(_l_ + 1); \
+ ((uint8_t*) _q_)[_l_] = 0; \
+ memcpy(_q_, p, _l_); \
+ })
+
+static inline void freep(void *p) {
+ free(*(void**) p);
+}
+
+#define _cleanup_free_ _cleanup_(freep)
+
+static inline bool size_multiply_overflow(size_t size, size_t need) {
+ return _unlikely_(need != 0 && size > (SIZE_MAX / need));
+}
+
+_malloc_ _alloc_(1, 2) static inline void *malloc_multiply(size_t size, size_t need) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return malloc(size * need ?: 1);
+}
+
+#if !HAVE_REALLOCARRAY
+_alloc_(2, 3) static inline void *reallocarray(void *p, size_t need, size_t size) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return realloc(p, size * need ?: 1);
+}
+#endif
+
+_alloc_(2, 3) static inline void *memdup_multiply(const void *p, size_t size, size_t need) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return memdup(p, size * need);
+}
+
+_alloc_(2, 3) static inline void *memdup_suffix0_multiply(const void *p, size_t size, size_t need) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return memdup_suffix0(p, size * need);
+}
+
+void* greedy_realloc(void **p, size_t *allocated, size_t need, size_t size);
+void* greedy_realloc0(void **p, size_t *allocated, size_t need, size_t size);
+
+#define GREEDY_REALLOC(array, allocated, need) \
+ greedy_realloc((void**) &(array), &(allocated), (need), sizeof((array)[0]))
+
+#define GREEDY_REALLOC0(array, allocated, need) \
+ greedy_realloc0((void**) &(array), &(allocated), (need), sizeof((array)[0]))
+
+#define alloca0(n) \
+ ({ \
+ char *_new_; \
+ size_t _len_ = n; \
+ assert(_len_ <= ALLOCA_MAX); \
+ _new_ = alloca(_len_); \
+ (void *) memset(_new_, 0, _len_); \
+ })
+
+/* It's not clear what alignment glibc/gcc alloca() guarantee, hence provide a guaranteed safe version */
+#define alloca_align(size, align) \
+ ({ \
+ void *_ptr_; \
+ size_t _mask_ = (align) - 1; \
+ size_t _size_ = size; \
+ assert(_size_ <= ALLOCA_MAX); \
+ _ptr_ = alloca(_size_ + _mask_); \
+ (void*)(((uintptr_t)_ptr_ + _mask_) & ~_mask_); \
+ })
+
+#define alloca0_align(size, align) \
+ ({ \
+ void *_new_; \
+ size_t _xsize_ = (size); \
+ _new_ = alloca_align(_xsize_, (align)); \
+ (void*)memset(_new_, 0, _xsize_); \
+ })
+
+/* Takes inspiration from Rusts's Option::take() method: reads and returns a pointer, but at the same time resets it to
+ * NULL. See: https://doc.rust-lang.org/std/option/enum.Option.html#method.take */
+#define TAKE_PTR(ptr) \
+ ({ \
+ typeof(ptr) _ptr_ = (ptr); \
+ (ptr) = NULL; \
+ _ptr_; \
+ })
diff --git a/src/basic/architecture.c b/src/basic/architecture.c
new file mode 100644
index 0000000..85837b5
--- /dev/null
+++ b/src/basic/architecture.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/utsname.h>
+
+#include "architecture.h"
+#include "macro.h"
+#include "string-table.h"
+#include "string-util.h"
+
+int uname_architecture(void) {
+
+ /* Return a sanitized enum identifying the architecture we are
+ * running on. This is based on uname(), and the user may
+ * hence control what this returns by using
+ * personality(). This puts the user in control on systems
+ * that can run binaries of multiple architectures.
+ *
+ * We do not translate the string returned by uname()
+ * 1:1. Instead we try to clean it up and break down the
+ * confusion on x86 and arm in particular.
+ *
+ * We do not try to distinguish CPUs not CPU features, but
+ * actual architectures, i.e. that have genuinely different
+ * code. */
+
+ static const struct {
+ const char *machine;
+ int arch;
+ } arch_map[] = {
+#if defined(__x86_64__) || defined(__i386__)
+ { "x86_64", ARCHITECTURE_X86_64 },
+ { "i686", ARCHITECTURE_X86 },
+ { "i586", ARCHITECTURE_X86 },
+ { "i486", ARCHITECTURE_X86 },
+ { "i386", ARCHITECTURE_X86 },
+#elif defined(__powerpc__) || defined(__powerpc64__)
+ { "ppc64", ARCHITECTURE_PPC64 },
+ { "ppc64le", ARCHITECTURE_PPC64_LE },
+ { "ppc", ARCHITECTURE_PPC },
+ { "ppcle", ARCHITECTURE_PPC_LE },
+#elif defined(__ia64__)
+ { "ia64", ARCHITECTURE_IA64 },
+#elif defined(__hppa__) || defined(__hppa64__)
+ { "parisc64", ARCHITECTURE_PARISC64 },
+ { "parisc", ARCHITECTURE_PARISC },
+#elif defined(__s390__) || defined(__s390x__)
+ { "s390x", ARCHITECTURE_S390X },
+ { "s390", ARCHITECTURE_S390 },
+#elif defined(__sparc__)
+ { "sparc64", ARCHITECTURE_SPARC64 },
+ { "sparc", ARCHITECTURE_SPARC },
+#elif defined(__mips__) || defined(__mips64__)
+ { "mips64", ARCHITECTURE_MIPS64 },
+ { "mips", ARCHITECTURE_MIPS },
+#elif defined(__alpha__)
+ { "alpha" , ARCHITECTURE_ALPHA },
+#elif defined(__arm__) || defined(__aarch64__)
+ { "aarch64", ARCHITECTURE_ARM64 },
+ { "aarch64_be", ARCHITECTURE_ARM64_BE },
+ { "armv4l", ARCHITECTURE_ARM },
+ { "armv4b", ARCHITECTURE_ARM_BE },
+ { "armv4tl", ARCHITECTURE_ARM },
+ { "armv4tb", ARCHITECTURE_ARM_BE },
+ { "armv5tl", ARCHITECTURE_ARM },
+ { "armv5tb", ARCHITECTURE_ARM_BE },
+ { "armv5tel", ARCHITECTURE_ARM },
+ { "armv5teb" , ARCHITECTURE_ARM_BE },
+ { "armv5tejl", ARCHITECTURE_ARM },
+ { "armv5tejb", ARCHITECTURE_ARM_BE },
+ { "armv6l", ARCHITECTURE_ARM },
+ { "armv6b", ARCHITECTURE_ARM_BE },
+ { "armv7l", ARCHITECTURE_ARM },
+ { "armv7b", ARCHITECTURE_ARM_BE },
+ { "armv7ml", ARCHITECTURE_ARM },
+ { "armv7mb", ARCHITECTURE_ARM_BE },
+ { "armv4l", ARCHITECTURE_ARM },
+ { "armv4b", ARCHITECTURE_ARM_BE },
+ { "armv4tl", ARCHITECTURE_ARM },
+ { "armv4tb", ARCHITECTURE_ARM_BE },
+ { "armv5tl", ARCHITECTURE_ARM },
+ { "armv5tb", ARCHITECTURE_ARM_BE },
+ { "armv5tel", ARCHITECTURE_ARM },
+ { "armv5teb", ARCHITECTURE_ARM_BE },
+ { "armv5tejl", ARCHITECTURE_ARM },
+ { "armv5tejb", ARCHITECTURE_ARM_BE },
+ { "armv6l", ARCHITECTURE_ARM },
+ { "armv6b", ARCHITECTURE_ARM_BE },
+ { "armv7l", ARCHITECTURE_ARM },
+ { "armv7b", ARCHITECTURE_ARM_BE },
+ { "armv7ml", ARCHITECTURE_ARM },
+ { "armv7mb", ARCHITECTURE_ARM_BE },
+ { "armv8l", ARCHITECTURE_ARM },
+ { "armv8b", ARCHITECTURE_ARM_BE },
+#elif defined(__sh__) || defined(__sh64__)
+ { "sh5", ARCHITECTURE_SH64 },
+ { "sh2", ARCHITECTURE_SH },
+ { "sh2a", ARCHITECTURE_SH },
+ { "sh3", ARCHITECTURE_SH },
+ { "sh4", ARCHITECTURE_SH },
+ { "sh4a", ARCHITECTURE_SH },
+#elif defined(__m68k__)
+ { "m68k", ARCHITECTURE_M68K },
+#elif defined(__tilegx__)
+ { "tilegx", ARCHITECTURE_TILEGX },
+#elif defined(__cris__)
+ { "crisv32", ARCHITECTURE_CRIS },
+#elif defined(__nios2__)
+ { "nios2", ARCHITECTURE_NIOS2 },
+#elif defined(__riscv__) || defined(__riscv)
+ /* __riscv__ is obsolete, remove in 2018 */
+ { "riscv32", ARCHITECTURE_RISCV32 },
+ { "riscv64", ARCHITECTURE_RISCV64 },
+# if __SIZEOF_POINTER__ == 4
+ { "riscv", ARCHITECTURE_RISCV32 },
+# elif __SIZEOF_POINTER__ == 8
+ { "riscv", ARCHITECTURE_RISCV64 },
+# endif
+#elif defined(__arc__)
+ { "arc", ARCHITECTURE_ARC },
+ { "arceb", ARCHITECTURE_ARC_BE },
+#else
+#error "Please register your architecture here!"
+#endif
+ };
+
+ static int cached = _ARCHITECTURE_INVALID;
+ struct utsname u;
+ unsigned i;
+
+ if (cached != _ARCHITECTURE_INVALID)
+ return cached;
+
+ assert_se(uname(&u) >= 0);
+
+ for (i = 0; i < ELEMENTSOF(arch_map); i++)
+ if (streq(arch_map[i].machine, u.machine))
+ return cached = arch_map[i].arch;
+
+ assert_not_reached("Couldn't identify architecture. You need to patch systemd.");
+ return _ARCHITECTURE_INVALID;
+}
+
+static const char *const architecture_table[_ARCHITECTURE_MAX] = {
+ [ARCHITECTURE_X86] = "x86",
+ [ARCHITECTURE_X86_64] = "x86-64",
+ [ARCHITECTURE_PPC] = "ppc",
+ [ARCHITECTURE_PPC_LE] = "ppc-le",
+ [ARCHITECTURE_PPC64] = "ppc64",
+ [ARCHITECTURE_PPC64_LE] = "ppc64-le",
+ [ARCHITECTURE_IA64] = "ia64",
+ [ARCHITECTURE_PARISC] = "parisc",
+ [ARCHITECTURE_PARISC64] = "parisc64",
+ [ARCHITECTURE_S390] = "s390",
+ [ARCHITECTURE_S390X] = "s390x",
+ [ARCHITECTURE_SPARC] = "sparc",
+ [ARCHITECTURE_SPARC64] = "sparc64",
+ [ARCHITECTURE_MIPS] = "mips",
+ [ARCHITECTURE_MIPS_LE] = "mips-le",
+ [ARCHITECTURE_MIPS64] = "mips64",
+ [ARCHITECTURE_MIPS64_LE] = "mips64-le",
+ [ARCHITECTURE_ALPHA] = "alpha",
+ [ARCHITECTURE_ARM] = "arm",
+ [ARCHITECTURE_ARM_BE] = "arm-be",
+ [ARCHITECTURE_ARM64] = "arm64",
+ [ARCHITECTURE_ARM64_BE] = "arm64-be",
+ [ARCHITECTURE_SH] = "sh",
+ [ARCHITECTURE_SH64] = "sh64",
+ [ARCHITECTURE_M68K] = "m68k",
+ [ARCHITECTURE_TILEGX] = "tilegx",
+ [ARCHITECTURE_CRIS] = "cris",
+ [ARCHITECTURE_NIOS2] = "nios2",
+ [ARCHITECTURE_RISCV32] = "riscv32",
+ [ARCHITECTURE_RISCV64] = "riscv64",
+ [ARCHITECTURE_ARC] = "arc",
+ [ARCHITECTURE_ARC_BE] = "arc-be",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(architecture, int);
diff --git a/src/basic/architecture.h b/src/basic/architecture.h
new file mode 100644
index 0000000..443e890
--- /dev/null
+++ b/src/basic/architecture.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <endian.h>
+
+#include "macro.h"
+#include "util.h"
+
+/* A cleaned up architecture definition. We don't want to get lost in
+ * processor features, models, generations or even ABIs. Hence we
+ * focus on general family, and distinguish word width and
+ * endianness. */
+
+enum {
+ ARCHITECTURE_X86 = 0,
+ ARCHITECTURE_X86_64,
+ ARCHITECTURE_PPC,
+ ARCHITECTURE_PPC_LE,
+ ARCHITECTURE_PPC64,
+ ARCHITECTURE_PPC64_LE,
+ ARCHITECTURE_IA64,
+ ARCHITECTURE_PARISC,
+ ARCHITECTURE_PARISC64,
+ ARCHITECTURE_S390,
+ ARCHITECTURE_S390X,
+ ARCHITECTURE_SPARC,
+ ARCHITECTURE_SPARC64,
+ ARCHITECTURE_MIPS,
+ ARCHITECTURE_MIPS_LE,
+ ARCHITECTURE_MIPS64,
+ ARCHITECTURE_MIPS64_LE,
+ ARCHITECTURE_ALPHA,
+ ARCHITECTURE_ARM,
+ ARCHITECTURE_ARM_BE,
+ ARCHITECTURE_ARM64,
+ ARCHITECTURE_ARM64_BE,
+ ARCHITECTURE_SH,
+ ARCHITECTURE_SH64,
+ ARCHITECTURE_M68K,
+ ARCHITECTURE_TILEGX,
+ ARCHITECTURE_CRIS,
+ ARCHITECTURE_NIOS2,
+ ARCHITECTURE_RISCV32,
+ ARCHITECTURE_RISCV64,
+ ARCHITECTURE_ARC,
+ ARCHITECTURE_ARC_BE,
+ _ARCHITECTURE_MAX,
+ _ARCHITECTURE_INVALID = -1
+};
+
+int uname_architecture(void);
+
+/*
+ * LIB_ARCH_TUPLE should resolve to the local library path
+ * architecture tuple systemd is built for, according to the Debian
+ * tuple list:
+ *
+ * https://wiki.debian.org/Multiarch/Tuples
+ *
+ * This is used in library search paths that should understand
+ * Debian's paths on all distributions.
+ */
+
+#if defined(__x86_64__)
+# define native_architecture() ARCHITECTURE_X86_64
+# if defined(__ILP32__)
+# define LIB_ARCH_TUPLE "x86_64-linux-gnux32"
+# else
+# define LIB_ARCH_TUPLE "x86_64-linux-gnu"
+# endif
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_X86
+#elif defined(__i386__)
+# define native_architecture() ARCHITECTURE_X86
+# define LIB_ARCH_TUPLE "i386-linux-gnu"
+#elif defined(__powerpc64__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_PPC64
+# define LIB_ARCH_TUPLE "ppc64-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_PPC
+# else
+# define native_architecture() ARCHITECTURE_PPC64_LE
+# define LIB_ARCH_TUPLE "powerpc64le-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_PPC_LE
+# endif
+#elif defined(__powerpc__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_PPC
+# if defined(__NO_FPRS__)
+# define LIB_ARCH_TUPLE "powerpc-linux-gnuspe"
+# else
+# define LIB_ARCH_TUPLE "powerpc-linux-gnu"
+# endif
+# else
+# define native_architecture() ARCHITECTURE_PPC_LE
+# error "Missing LIB_ARCH_TUPLE for PPCLE"
+# endif
+#elif defined(__ia64__)
+# define native_architecture() ARCHITECTURE_IA64
+# define LIB_ARCH_TUPLE "ia64-linux-gnu"
+#elif defined(__hppa64__)
+# define native_architecture() ARCHITECTURE_PARISC64
+# error "Missing LIB_ARCH_TUPLE for HPPA64"
+#elif defined(__hppa__)
+# define native_architecture() ARCHITECTURE_PARISC
+# define LIB_ARCH_TUPLE "hppa‑linux‑gnu"
+#elif defined(__s390x__)
+# define native_architecture() ARCHITECTURE_S390X
+# define LIB_ARCH_TUPLE "s390x-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_S390
+#elif defined(__s390__)
+# define native_architecture() ARCHITECTURE_S390
+# define LIB_ARCH_TUPLE "s390-linux-gnu"
+#elif defined(__sparc__) && defined (__arch64__)
+# define native_architecture() ARCHITECTURE_SPARC64
+# define LIB_ARCH_TUPLE "sparc64-linux-gnu"
+#elif defined(__sparc__)
+# define native_architecture() ARCHITECTURE_SPARC
+# define LIB_ARCH_TUPLE "sparc-linux-gnu"
+#elif defined(__mips64) && defined(__LP64__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_MIPS64
+# define LIB_ARCH_TUPLE "mips64-linux-gnuabi64"
+# else
+# define native_architecture() ARCHITECTURE_MIPS64_LE
+# define LIB_ARCH_TUPLE "mips64el-linux-gnuabi64"
+# endif
+#elif defined(__mips64)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_MIPS64
+# define LIB_ARCH_TUPLE "mips64-linux-gnuabin32"
+# else
+# define native_architecture() ARCHITECTURE_MIPS64_LE
+# define LIB_ARCH_TUPLE "mips64el-linux-gnuabin32"
+# endif
+#elif defined(__mips__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_MIPS
+# define LIB_ARCH_TUPLE "mips-linux-gnu"
+# else
+# define native_architecture() ARCHITECTURE_MIPS_LE
+# define LIB_ARCH_TUPLE "mipsel-linux-gnu"
+# endif
+#elif defined(__alpha__)
+# define native_architecture() ARCHITECTURE_ALPHA
+# define LIB_ARCH_TUPLE "alpha-linux-gnu"
+#elif defined(__aarch64__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_ARM64_BE
+# define LIB_ARCH_TUPLE "aarch64_be-linux-gnu"
+# else
+# define native_architecture() ARCHITECTURE_ARM64
+# define LIB_ARCH_TUPLE "aarch64-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_ARM
+# endif
+#elif defined(__arm__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_ARM_BE
+# if defined(__ARM_EABI__)
+# if defined(__ARM_PCS_VFP)
+# define LIB_ARCH_TUPLE "armeb-linux-gnueabihf"
+# else
+# define LIB_ARCH_TUPLE "armeb-linux-gnueabi"
+# endif
+# else
+# define LIB_ARCH_TUPLE "armeb-linux-gnu"
+# endif
+# else
+# define native_architecture() ARCHITECTURE_ARM
+# if defined(__ARM_EABI__)
+# if defined(__ARM_PCS_VFP)
+# define LIB_ARCH_TUPLE "arm-linux-gnueabihf"
+# else
+# define LIB_ARCH_TUPLE "arm-linux-gnueabi"
+# endif
+# else
+# define LIB_ARCH_TUPLE "arm-linux-gnu"
+# endif
+# endif
+#elif defined(__sh64__)
+# define native_architecture() ARCHITECTURE_SH64
+# error "Missing LIB_ARCH_TUPLE for SH64"
+#elif defined(__sh__)
+# define native_architecture() ARCHITECTURE_SH
+# if defined(__SH1__)
+# define LIB_ARCH_TUPLE "sh1-linux-gnu"
+# elif defined(__SH2__)
+# define LIB_ARCH_TUPLE "sh2-linux-gnu"
+# elif defined(__SH2A__)
+# define LIB_ARCH_TUPLE "sh2a-linux-gnu"
+# elif defined(__SH2E__)
+# define LIB_ARCH_TUPLE "sh2e-linux-gnu"
+# elif defined(__SH3__)
+# define LIB_ARCH_TUPLE "sh3-linux-gnu"
+# elif defined(__SH3E__)
+# define LIB_ARCH_TUPLE "sh3e-linux-gnu"
+# elif defined(__SH4__) && !defined(__SH4A__)
+# define LIB_ARCH_TUPLE "sh4-linux-gnu"
+# elif defined(__SH4A__)
+# define LIB_ARCH_TUPLE "sh4a-linux-gnu"
+# endif
+#elif defined(__m68k__)
+# define native_architecture() ARCHITECTURE_M68K
+# define LIB_ARCH_TUPLE "m68k-linux-gnu"
+#elif defined(__tilegx__)
+# define native_architecture() ARCHITECTURE_TILEGX
+# define LIB_ARCH_TUPLE "tilegx-linux-gnu"
+#elif defined(__cris__)
+# define native_architecture() ARCHITECTURE_CRIS
+# error "Missing LIB_ARCH_TUPLE for CRIS"
+#elif defined(__nios2__)
+# define native_architecture() ARCHITECTURE_NIOS2
+# define LIB_ARCH_TUPLE "nios2-linux-gnu"
+#elif defined(__riscv__) || defined(__riscv)
+ /* __riscv__ is obsolete, remove in 2018 */
+# if __SIZEOF_POINTER__ == 4
+# define native_architecture() ARCHITECTURE_RISCV32
+# define LIB_ARCH_TUPLE "riscv32-linux-gnu"
+# elif __SIZEOF_POINTER__ == 8
+# define native_architecture() ARCHITECTURE_RISCV64
+# define LIB_ARCH_TUPLE "riscv64-linux-gnu"
+# else
+# error "Unrecognized riscv architecture variant"
+# endif
+#elif defined(__arc__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_ARC_BE
+# define LIB_ARCH_TUPLE "arceb-linux"
+# else
+# define native_architecture() ARCHITECTURE_ARC
+# define LIB_ARCH_TUPLE "arc-linux"
+# endif
+#else
+# error "Please register your architecture here!"
+#endif
+
+const char *architecture_to_string(int a) _const_;
+int architecture_from_string(const char *s) _pure_;
diff --git a/src/basic/arphrd-list.c b/src/basic/arphrd-list.c
new file mode 100644
index 0000000..b6e2486
--- /dev/null
+++ b/src/basic/arphrd-list.c
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <net/if_arp.h>
+#include <string.h>
+
+#include "arphrd-list.h"
+#include "macro.h"
+#include "missing_network.h"
+
+static const struct arphrd_name* lookup_arphrd(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "arphrd-from-name.h"
+#include "arphrd-to-name.h"
+
+const char *arphrd_to_name(int id) {
+
+ if (id <= 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(arphrd_names))
+ return NULL;
+
+ return arphrd_names[id];
+}
+
+int arphrd_from_name(const char *name) {
+ const struct arphrd_name *sc;
+
+ assert(name);
+
+ sc = lookup_arphrd(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+int arphrd_max(void) {
+ return ELEMENTSOF(arphrd_names);
+}
diff --git a/src/basic/arphrd-list.h b/src/basic/arphrd-list.h
new file mode 100644
index 0000000..5dcfe5e
--- /dev/null
+++ b/src/basic/arphrd-list.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+const char *arphrd_to_name(int id);
+int arphrd_from_name(const char *name);
+
+int arphrd_max(void);
diff --git a/src/basic/arphrd-to-name.awk b/src/basic/arphrd-to-name.awk
new file mode 100644
index 0000000..5a35673
--- /dev/null
+++ b/src/basic/arphrd-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const arphrd_names[] = { "
+}
+!/CISCO/ {
+ printf " [ARPHRD_%s] = \"%s\",\n", $1, $1
+}
+END{
+ print "};"
+}
diff --git a/src/basic/async.c b/src/basic/async.c
new file mode 100644
index 0000000..c45ca01
--- /dev/null
+++ b/src/basic/async.c
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <unistd.h>
+
+#include "async.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "util.h"
+
+int asynchronous_job(void* (*func)(void *p), void *arg) {
+ sigset_t ss, saved_ss;
+ pthread_attr_t a;
+ pthread_t t;
+ int r, k;
+
+ /* It kinda sucks that we have to resort to threads to implement an asynchronous close(), but well, such is
+ * life. */
+
+ r = pthread_attr_init(&a);
+ if (r > 0)
+ return -r;
+
+ r = pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED);
+ if (r > 0) {
+ r = -r;
+ goto finish;
+ }
+
+ assert_se(sigfillset(&ss) >= 0);
+
+ /* Block all signals before forking off the thread, so that the new thread is started with all signals
+ * blocked. This way the existence of the new thread won't affect signal handling in other threads. */
+
+ r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss);
+ if (r > 0) {
+ r = -r;
+ goto finish;
+ }
+
+ r = pthread_create(&t, &a, func, arg);
+
+ k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL);
+
+ if (r > 0)
+ r = -r;
+ else if (k > 0)
+ r = -k;
+ else
+ r = 0;
+
+finish:
+ pthread_attr_destroy(&a);
+ return r;
+}
+
+int asynchronous_sync(pid_t *ret_pid) {
+ int r;
+
+ /* This forks off an invocation of fork() as a child process, in order to initiate synchronization to
+ * disk. Note that we implement this as helper process rather than thread as we don't want the sync() to hang our
+ * original process ever, and a thread would do that as the process can't exit with threads hanging in blocking
+ * syscalls. */
+
+ r = safe_fork("(sd-sync)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS, ret_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child process */
+ (void) sync();
+ _exit(EXIT_SUCCESS);
+ }
+
+ return 0;
+}
+
+static void *close_thread(void *p) {
+ (void) pthread_setname_np(pthread_self(), "close");
+
+ assert_se(close_nointr(PTR_TO_FD(p)) != -EBADF);
+ return NULL;
+}
+
+int asynchronous_close(int fd) {
+ int r;
+
+ /* This is supposed to behave similar to safe_close(), but
+ * actually invoke close() asynchronously, so that it will
+ * never block. Ideally the kernel would have an API for this,
+ * but it doesn't, so we work around it, and hide this as a
+ * far away as we can. */
+
+ if (fd >= 0) {
+ PROTECT_ERRNO;
+
+ r = asynchronous_job(close_thread, FD_TO_PTR(fd));
+ if (r < 0)
+ assert_se(close_nointr(fd) != -EBADF);
+ }
+
+ return -1;
+}
diff --git a/src/basic/async.h b/src/basic/async.h
new file mode 100644
index 0000000..3160613
--- /dev/null
+++ b/src/basic/async.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int asynchronous_job(void* (*func)(void *p), void *arg);
+
+int asynchronous_sync(pid_t *ret_pid);
+int asynchronous_close(int fd);
diff --git a/src/basic/audit-util.c b/src/basic/audit-util.c
new file mode 100644
index 0000000..5cbaef3
--- /dev/null
+++ b/src/basic/audit-util.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <linux/netlink.h>
+#include <stdio.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "user-util.h"
+
+int audit_session_from_pid(pid_t pid, uint32_t *id) {
+ _cleanup_free_ char *s = NULL;
+ const char *p;
+ uint32_t u;
+ int r;
+
+ assert(id);
+
+ /* We don't convert ENOENT to ESRCH here, since we can't
+ * really distuingish between "audit is not available in the
+ * kernel" and "the process does not exist", both which will
+ * result in ENOENT. */
+
+ p = procfs_file_alloca(pid, "sessionid");
+
+ r = read_one_line_file(p, &s);
+ if (r < 0)
+ return r;
+
+ r = safe_atou32(s, &u);
+ if (r < 0)
+ return r;
+
+ if (!audit_session_is_valid(u))
+ return -ENODATA;
+
+ *id = u;
+ return 0;
+}
+
+int audit_loginuid_from_pid(pid_t pid, uid_t *uid) {
+ _cleanup_free_ char *s = NULL;
+ const char *p;
+ uid_t u;
+ int r;
+
+ assert(uid);
+
+ p = procfs_file_alloca(pid, "loginuid");
+
+ r = read_one_line_file(p, &s);
+ if (r < 0)
+ return r;
+
+ r = parse_uid(s, &u);
+ if (r == -ENXIO) /* the UID was -1 */
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ *uid = u;
+ return 0;
+}
+
+bool use_audit(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0) {
+ int fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_AUDIT);
+ if (fd < 0) {
+ cached_use = !IN_SET(errno, EAFNOSUPPORT, EPROTONOSUPPORT, EPERM);
+ if (!cached_use)
+ log_debug_errno(errno, "Won't talk to audit: %m");
+ } else {
+ cached_use = true;
+ safe_close(fd);
+ }
+ }
+
+ return cached_use;
+}
diff --git a/src/basic/audit-util.h b/src/basic/audit-util.h
new file mode 100644
index 0000000..c9fc498
--- /dev/null
+++ b/src/basic/audit-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#define AUDIT_SESSION_INVALID ((uint32_t) -1)
+
+int audit_session_from_pid(pid_t pid, uint32_t *id);
+int audit_loginuid_from_pid(pid_t pid, uid_t *uid);
+
+bool use_audit(void);
+
+static inline bool audit_session_is_valid(uint32_t id) {
+ return id > 0 && id != AUDIT_SESSION_INVALID;
+}
diff --git a/src/basic/blockdev-util.c b/src/basic/blockdev-util.c
new file mode 100644
index 0000000..3017ecd
--- /dev/null
+++ b/src/basic/blockdev-util.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/stat.h>
+#include <sys/statfs.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "stat-util.h"
+
+int block_get_whole_disk(dev_t d, dev_t *ret) {
+ char p[SYS_BLOCK_PATH_MAX("/partition")];
+ _cleanup_free_ char *s = NULL;
+ dev_t devt;
+ int r;
+
+ assert(ret);
+
+ /* If it has a queue this is good enough for us */
+ xsprintf_sys_block_path(p, "/queue", d);
+ if (access(p, F_OK) >= 0) {
+ *ret = d;
+ return 0;
+ }
+
+ /* If it is a partition find the originating device */
+ xsprintf_sys_block_path(p, "/partition", d);
+ if (access(p, F_OK) < 0)
+ return -ENOENT;
+
+ /* Get parent dev_t */
+ xsprintf_sys_block_path(p, "/../dev", d);
+ r = read_one_line_file(p, &s);
+ if (r < 0)
+ return r;
+
+ r = parse_dev(s, &devt);
+ if (r < 0)
+ return r;
+
+ /* Only return this if it is really good enough for us. */
+ xsprintf_sys_block_path(p, "/queue", devt);
+ if (access(p, F_OK) < 0)
+ return -ENOENT;
+
+ *ret = devt;
+ return 0;
+}
+
+int get_block_device(const char *path, dev_t *dev) {
+ struct stat st;
+ struct statfs sfs;
+
+ assert(path);
+ assert(dev);
+
+ /* Get's the block device directly backing a file system. If
+ * the block device is encrypted, returns the device mapper
+ * block device. */
+
+ if (lstat(path, &st))
+ return -errno;
+
+ if (major(st.st_dev) != 0) {
+ *dev = st.st_dev;
+ return 1;
+ }
+
+ if (statfs(path, &sfs) < 0)
+ return -errno;
+
+ if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC))
+ return btrfs_get_block_device(path, dev);
+
+ *dev = 0;
+ return 0;
+}
+
+int block_get_originating(dev_t dt, dev_t *ret) {
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_free_ char *t = NULL;
+ char p[SYS_BLOCK_PATH_MAX("/slaves")];
+ struct dirent *de, *found = NULL;
+ const char *q;
+ dev_t devt;
+ int r;
+
+ /* For the specified block device tries to chase it through the layers, in case LUKS-style DM stacking is used,
+ * trying to find the next underlying layer. */
+
+ xsprintf_sys_block_path(p, "/slaves", dt);
+ d = opendir(p);
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
+ continue;
+
+ if (found) {
+ _cleanup_free_ char *u = NULL, *v = NULL, *a = NULL, *b = NULL;
+
+ /* We found a device backed by multiple other devices. We don't really support automatic
+ * discovery on such setups, with the exception of dm-verity partitions. In this case there are
+ * two backing devices: the data partition and the hash partition. We are fine with such
+ * setups, however, only if both partitions are on the same physical device. Hence, let's
+ * verify this. */
+
+ u = strjoin(p, "/", de->d_name, "/../dev");
+ if (!u)
+ return -ENOMEM;
+
+ v = strjoin(p, "/", found->d_name, "/../dev");
+ if (!v)
+ return -ENOMEM;
+
+ r = read_one_line_file(u, &a);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read %s: %m", u);
+
+ r = read_one_line_file(v, &b);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read %s: %m", v);
+
+ /* Check if the parent device is the same. If not, then the two backing devices are on
+ * different physical devices, and we don't support that. */
+ if (!streq(a, b))
+ return -ENOTUNIQ;
+ }
+
+ found = de;
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ q = strjoina(p, "/", found->d_name, "/dev");
+
+ r = read_one_line_file(q, &t);
+ if (r < 0)
+ return r;
+
+ r = parse_dev(t, &devt);
+ if (r < 0)
+ return -EINVAL;
+
+ if (major(devt) == 0)
+ return -ENOENT;
+
+ *ret = devt;
+ return 1;
+}
+
+int get_block_device_harder(const char *path, dev_t *ret) {
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ /* Gets the backing block device for a file system, and handles LUKS encrypted file systems, looking for its
+ * immediate parent, if there is one. */
+
+ r = get_block_device(path, ret);
+ if (r <= 0)
+ return r;
+
+ r = block_get_originating(*ret, ret);
+ if (r < 0)
+ log_debug_errno(r, "Failed to chase block device '%s', ignoring: %m", path);
+
+ return 1;
+}
diff --git a/src/basic/blockdev-util.h b/src/basic/blockdev-util.h
new file mode 100644
index 0000000..6d8a796
--- /dev/null
+++ b/src/basic/blockdev-util.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+#include "macro.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+#define SYS_BLOCK_PATH_MAX(suffix) \
+ (STRLEN("/sys/dev/block/") + DECIMAL_STR_MAX(dev_t) + 1 + DECIMAL_STR_MAX(dev_t) + strlen_ptr(suffix))
+#define xsprintf_sys_block_path(buf, suffix, devno) \
+ xsprintf(buf, "/sys/dev/block/%u:%u%s", major(devno), minor(devno), strempty(suffix))
+
+int block_get_whole_disk(dev_t d, dev_t *ret);
+int block_get_originating(dev_t d, dev_t *ret);
+
+int get_block_device(const char *path, dev_t *dev);
+
+int get_block_device_harder(const char *path, dev_t *dev);
diff --git a/src/basic/btrfs-util.c b/src/basic/btrfs-util.c
new file mode 100644
index 0000000..da4dd2a
--- /dev/null
+++ b/src/basic/btrfs-util.c
@@ -0,0 +1,1993 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "device-nodes.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "smack-util.h"
+#include "sparse-endian.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "util.h"
+
+/* WARNING: Be careful with file system ioctls! When we get an fd, we
+ * need to make sure it either refers to only a regular file or
+ * directory, or that it is located on btrfs, before invoking any
+ * btrfs ioctls. The ioctl numbers are reused by some device drivers
+ * (such as DRM), and hence might have bad effects when invoked on
+ * device nodes (that reference drivers) rather than fds to normal
+ * files or directories. */
+
+static int validate_subvolume_name(const char *name) {
+
+ if (!filename_is_valid(name))
+ return -EINVAL;
+
+ if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
+ return -E2BIG;
+
+ return 0;
+}
+
+static int extract_subvolume_name(const char *path, const char **subvolume) {
+ const char *fn;
+ int r;
+
+ assert(path);
+ assert(subvolume);
+
+ fn = basename(path);
+
+ r = validate_subvolume_name(fn);
+ if (r < 0)
+ return r;
+
+ *subvolume = fn;
+ return 0;
+}
+
+int btrfs_is_filesystem(int fd) {
+ struct statfs sfs;
+
+ assert(fd >= 0);
+
+ if (fstatfs(fd, &sfs) < 0)
+ return -errno;
+
+ return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
+}
+
+int btrfs_is_subvol_fd(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ /* On btrfs subvolumes always have the inode 256 */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
+ return 0;
+
+ return btrfs_is_filesystem(fd);
+}
+
+int btrfs_is_subvol(const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ assert(path);
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_is_subvol_fd(fd);
+}
+
+int btrfs_subvol_make_fd(int fd, const char *subvolume) {
+ struct btrfs_ioctl_vol_args args = {};
+ _cleanup_close_ int real_fd = -1;
+ int r;
+
+ assert(subvolume);
+
+ r = validate_subvolume_name(subvolume);
+ if (r < 0)
+ return r;
+
+ r = fcntl(fd, F_GETFL);
+ if (r < 0)
+ return -errno;
+ if (FLAGS_SET(r, O_PATH)) {
+ /* An O_PATH fd was specified, let's convert here to a proper one, as btrfs ioctl's can't deal with
+ * O_PATH. */
+
+ real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (real_fd < 0)
+ return real_fd;
+
+ fd = real_fd;
+ }
+
+ strncpy(args.name, subvolume, sizeof(args.name)-1);
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_subvol_make(const char *path) {
+ _cleanup_close_ int fd = -1;
+ const char *subvolume;
+ int r;
+
+ assert(path);
+
+ r = extract_subvolume_name(path, &subvolume);
+ if (r < 0)
+ return r;
+
+ fd = open_parent(path, O_CLOEXEC, 0);
+ if (fd < 0)
+ return fd;
+
+ return btrfs_subvol_make_fd(fd, subvolume);
+}
+
+int btrfs_subvol_set_read_only_fd(int fd, bool b) {
+ uint64_t flags, nflags;
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
+ return -EINVAL;
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
+ return -errno;
+
+ if (b)
+ nflags = flags | BTRFS_SUBVOL_RDONLY;
+ else
+ nflags = flags & ~BTRFS_SUBVOL_RDONLY;
+
+ if (flags == nflags)
+ return 0;
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_subvol_set_read_only(const char *path, bool b) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_set_read_only_fd(fd, b);
+}
+
+int btrfs_subvol_get_read_only_fd(int fd) {
+ uint64_t flags;
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
+ return -EINVAL;
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
+ return -errno;
+
+ return !!(flags & BTRFS_SUBVOL_RDONLY);
+}
+
+int btrfs_reflink(int infd, int outfd) {
+ int r;
+
+ assert(infd >= 0);
+ assert(outfd >= 0);
+
+ /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */
+
+ r = fd_verify_regular(outfd);
+ if (r < 0)
+ return r;
+
+ if (ioctl(outfd, BTRFS_IOC_CLONE, infd) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
+ struct btrfs_ioctl_clone_range_args args = {
+ .src_fd = infd,
+ .src_offset = in_offset,
+ .src_length = sz,
+ .dest_offset = out_offset,
+ };
+ int r;
+
+ assert(infd >= 0);
+ assert(outfd >= 0);
+ assert(sz > 0);
+
+ r = fd_verify_regular(outfd);
+ if (r < 0)
+ return r;
+
+ if (ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_get_block_device_fd(int fd, dev_t *dev) {
+ struct btrfs_ioctl_fs_info_args fsi = {};
+ uint64_t id;
+ int r;
+
+ assert(fd >= 0);
+ assert(dev);
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
+ return -errno;
+
+ /* We won't do this for btrfs RAID */
+ if (fsi.num_devices != 1) {
+ *dev = 0;
+ return 0;
+ }
+
+ for (id = 1; id <= fsi.max_id; id++) {
+ struct btrfs_ioctl_dev_info_args di = {
+ .devid = id,
+ };
+ struct stat st;
+
+ if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
+ if (errno == ENODEV)
+ continue;
+
+ return -errno;
+ }
+
+ if (stat((char*) di.path, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENODEV;
+
+ if (major(st.st_rdev) == 0)
+ return -ENODEV;
+
+ *dev = st.st_rdev;
+ return 1;
+ }
+
+ return -ENODEV;
+}
+
+int btrfs_get_block_device(const char *path, dev_t *dev) {
+ _cleanup_close_ int fd = -1;
+
+ assert(path);
+ assert(dev);
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_get_block_device_fd(fd, dev);
+}
+
+int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
+ struct btrfs_ioctl_ino_lookup_args args = {
+ .objectid = BTRFS_FIRST_FREE_OBJECTID
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
+ return -errno;
+
+ *ret = args.treeid;
+ return 0;
+}
+
+int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
+ _cleanup_close_ int subvol_fd = -1;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (subvol_fd < 0)
+ return -errno;
+
+ return btrfs_subvol_get_id_fd(subvol_fd, ret);
+}
+
+static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
+ assert(args);
+
+ /* the objectid, type, offset together make up the btrfs key,
+ * which is considered a single 136byte integer when
+ * comparing. This call increases the counter by one, dealing
+ * with the overflow between the overflows */
+
+ if (args->key.min_offset < (uint64_t) -1) {
+ args->key.min_offset++;
+ return true;
+ }
+
+ if (args->key.min_type < (uint8_t) -1) {
+ args->key.min_type++;
+ args->key.min_offset = 0;
+ return true;
+ }
+
+ if (args->key.min_objectid < (uint64_t) -1) {
+ args->key.min_objectid++;
+ args->key.min_offset = 0;
+ args->key.min_type = 0;
+ return true;
+ }
+
+ return 0;
+}
+
+static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
+ assert(args);
+ assert(h);
+
+ args->key.min_objectid = h->objectid;
+ args->key.min_type = h->type;
+ args->key.min_offset = h->offset;
+}
+
+static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
+ int r;
+
+ assert(args);
+
+ /* Compare min and max */
+
+ r = CMP(args->key.min_objectid, args->key.max_objectid);
+ if (r != 0)
+ return r;
+
+ r = CMP(args->key.min_type, args->key.max_type);
+ if (r != 0)
+ return r;
+
+ return CMP(args->key.min_offset, args->key.max_offset);
+}
+
+#define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \
+ for ((i) = 0, \
+ (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \
+ (i) < (args).key.nr_items; \
+ (i)++, \
+ (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
+
+#define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \
+ ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header)))
+
+int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of tree roots */
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ /* Look precisely for the subvolume items */
+ .key.min_type = BTRFS_ROOT_ITEM_KEY,
+ .key.max_type = BTRFS_ROOT_ITEM_KEY,
+
+ .key.min_offset = 0,
+ .key.max_offset = (uint64_t) -1,
+
+ /* No restrictions on the other components */
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ bool found = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_objectid = args.key.max_objectid = subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return -errno;
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ const struct btrfs_root_item *ri;
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->objectid != subvol_id)
+ continue;
+ if (sh->type != BTRFS_ROOT_ITEM_KEY)
+ continue;
+
+ /* Older versions of the struct lacked the otime setting */
+ if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
+ continue;
+
+ ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
+ (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
+
+ ret->subvol_id = subvol_id;
+ ret->read_only = le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY;
+
+ assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
+ memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
+ memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
+
+ found = true;
+ goto finish;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+finish:
+ if (!found)
+ return -ENODATA;
+
+ return 0;
+}
+
+int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of quota items */
+ .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
+
+ /* The object ID is always 0 */
+ .key.min_objectid = 0,
+ .key.max_objectid = 0,
+
+ /* Look precisely for the quota items */
+ .key.min_type = BTRFS_QGROUP_STATUS_KEY,
+ .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
+
+ /* No restrictions on the other components */
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ bool found_info = false, found_limit = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (qgroupid == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &qgroupid);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_offset = args.key.max_offset = qgroupid;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
+ if (errno == ENOENT) /* quota tree is missing: quota disabled */
+ break;
+
+ return -errno;
+ }
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->objectid != 0)
+ continue;
+ if (sh->offset != qgroupid)
+ continue;
+
+ if (sh->type == BTRFS_QGROUP_INFO_KEY) {
+ const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ ret->referenced = le64toh(qii->rfer);
+ ret->exclusive = le64toh(qii->excl);
+
+ found_info = true;
+
+ } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
+ const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
+ ret->referenced_max = le64toh(qli->max_rfer);
+ else
+ ret->referenced_max = (uint64_t) -1;
+
+ if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
+ ret->exclusive_max = le64toh(qli->max_excl);
+ else
+ ret->exclusive_max = (uint64_t) -1;
+
+ found_limit = true;
+ }
+
+ if (found_info && found_limit)
+ goto finish;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+finish:
+ if (!found_limit && !found_info)
+ return -ENODATA;
+
+ if (!found_info) {
+ ret->referenced = (uint64_t) -1;
+ ret->exclusive = (uint64_t) -1;
+ }
+
+ if (!found_limit) {
+ ret->referenced_max = (uint64_t) -1;
+ ret->exclusive_max = (uint64_t) -1;
+ }
+
+ return 0;
+}
+
+int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
+}
+
+int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
+ uint64_t level, lowest = (uint64_t) -1, lowest_qgroupid = 0;
+ _cleanup_free_ uint64_t *qgroups = NULL;
+ int r, n, i;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ /* This finds the "subtree" qgroup for a specific
+ * subvolume. This only works for subvolumes that have been
+ * prepared with btrfs_subvol_auto_qgroup_fd() with
+ * insert_intermediary_qgroup=true (or equivalent). For others
+ * it will return the leaf qgroup instead. The two cases may
+ * be distuingished via the return value, which is 1 in case
+ * an appropriate "subtree" qgroup was found, and 0
+ * otherwise. */
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ r = btrfs_qgroupid_split(subvol_id, &level, NULL);
+ if (r < 0)
+ return r;
+ if (level != 0) /* Input must be a leaf qgroup */
+ return -EINVAL;
+
+ n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
+ if (n < 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ uint64_t id;
+
+ r = btrfs_qgroupid_split(qgroups[i], &level, &id);
+ if (r < 0)
+ return r;
+
+ if (id != subvol_id)
+ continue;
+
+ if (lowest == (uint64_t) -1 || level < lowest) {
+ lowest_qgroupid = qgroups[i];
+ lowest = level;
+ }
+ }
+
+ if (lowest == (uint64_t) -1) {
+ /* No suitable higher-level qgroup found, let's return
+ * the leaf qgroup instead, and indicate that with the
+ * return value. */
+
+ *ret = subvol_id;
+ return 0;
+ }
+
+ *ret = lowest_qgroupid;
+ return 1;
+}
+
+int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
+ uint64_t qgroupid;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ /* This determines the quota data of the qgroup with the
+ * lowest level, that shares the id part with the specified
+ * subvolume. This is useful for determining the quota data
+ * for entire subvolume subtrees, as long as the subtrees have
+ * been set up with btrfs_qgroup_subvol_auto_fd() or in a
+ * compatible way */
+
+ r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
+ if (r < 0)
+ return r;
+
+ return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
+}
+
+int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
+}
+
+int btrfs_defrag_fd(int fd) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = fd_verify_regular(fd);
+ if (r < 0)
+ return r;
+
+ if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_defrag(const char *p) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_defrag_fd(fd);
+}
+
+int btrfs_quota_enable_fd(int fd, bool b) {
+ struct btrfs_ioctl_quota_ctl_args args = {
+ .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
+ };
+ int r;
+
+ assert(fd >= 0);
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_quota_enable(const char *path, bool b) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_quota_enable_fd(fd, b);
+}
+
+int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
+
+ struct btrfs_ioctl_qgroup_limit_args args = {
+ .lim.max_rfer = referenced_max,
+ .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
+ };
+ unsigned c;
+ int r;
+
+ assert(fd >= 0);
+
+ if (qgroupid == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &qgroupid);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.qgroupid = qgroupid;
+
+ for (c = 0;; c++) {
+ if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
+
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
+}
+
+int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
+ uint64_t qgroupid;
+ int r;
+
+ assert(fd >= 0);
+
+ r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
+ if (r < 0)
+ return r;
+
+ return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
+}
+
+int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
+}
+
+int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
+ assert(ret);
+
+ if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
+ return -EINVAL;
+
+ if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
+ return -EINVAL;
+
+ *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
+ return 0;
+}
+
+int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
+ assert(level || id);
+
+ if (level)
+ *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+
+ if (id)
+ *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
+
+ return 0;
+}
+
+static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
+
+ struct btrfs_ioctl_qgroup_create_args args = {
+ .create = b,
+ .qgroupid = qgroupid,
+ };
+ unsigned c;
+ int r;
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOTTY;
+
+ for (c = 0;; c++) {
+ if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
+
+ /* If quota is not enabled, we get EINVAL. Turn this into a recognizable error */
+ if (errno == EINVAL)
+ return -ENOPROTOOPT;
+
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
+ return qgroup_create_or_destroy(fd, true, qgroupid);
+}
+
+int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
+ return qgroup_create_or_destroy(fd, false, qgroupid);
+}
+
+int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
+ _cleanup_free_ uint64_t *qgroups = NULL;
+ uint64_t subvol_id;
+ int i, n, r;
+
+ /* Destroys the specified qgroup, but unassigns it from all
+ * its parents first. Also, it recursively destroys all
+ * qgroups it is assigned to that have the same id part of the
+ * qgroupid as the specified group. */
+
+ r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
+ if (r < 0)
+ return r;
+
+ n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
+ if (n < 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ uint64_t id;
+
+ r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
+ if (r < 0)
+ return r;
+
+ r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
+ if (r < 0)
+ return r;
+
+ if (id != subvol_id)
+ continue;
+
+ /* The parent qgroupid shares the same id part with
+ * us? If so, destroy it too. */
+
+ (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
+ }
+
+ return btrfs_qgroup_destroy(fd, qgroupid);
+}
+
+int btrfs_quota_scan_start(int fd) {
+ struct btrfs_ioctl_quota_rescan_args args = {};
+
+ assert(fd >= 0);
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_quota_scan_wait(int fd) {
+ assert(fd >= 0);
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_quota_scan_ongoing(int fd) {
+ struct btrfs_ioctl_quota_rescan_args args = {};
+
+ assert(fd >= 0);
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
+ return -errno;
+
+ return !!args.flags;
+}
+
+static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
+ struct btrfs_ioctl_qgroup_assign_args args = {
+ .assign = b,
+ .src = child,
+ .dst = parent,
+ };
+ unsigned c;
+ int r;
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOTTY;
+
+ for (c = 0;; c++) {
+ r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
+ if (r < 0) {
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ if (r == 0)
+ return 0;
+
+ /* If the return value is > 0, we need to request a rescan */
+
+ (void) btrfs_quota_scan_start(fd);
+ return 1;
+ }
+}
+
+int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
+ return qgroup_assign_or_unassign(fd, true, child, parent);
+}
+
+int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
+ return qgroup_assign_or_unassign(fd, false, child, parent);
+}
+
+static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
+ struct btrfs_ioctl_search_args args = {
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
+ .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
+
+ .key.min_type = BTRFS_ROOT_BACKREF_KEY,
+ .key.max_type = BTRFS_ROOT_BACKREF_KEY,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ struct btrfs_ioctl_vol_args vol_args = {};
+ _cleanup_close_ int subvol_fd = -1;
+ struct stat st;
+ bool made_writable = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(subvolume);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -EINVAL;
+
+ subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (subvol_fd < 0)
+ return -errno;
+
+ /* Let's check if this is actually a subvolume. Note that this is mostly redundant, as BTRFS_IOC_SNAP_DESTROY
+ * would fail anyway if it is not. However, it's a good thing to check this ahead of time so that we can return
+ * ENOTTY unconditionally in this case. This is different from the ioctl() which will return EPERM/EACCES if we
+ * don't have the privileges to remove subvolumes, regardless if the specified directory is actually a
+ * subvolume or not. In order to make it easy for callers to cover the "this is not a btrfs subvolume" case
+ * let's prefer ENOTTY over EPERM/EACCES though. */
+ r = btrfs_is_subvol_fd(subvol_fd);
+ if (r < 0)
+ return r;
+ if (r == 0) /* Not a btrfs subvolume */
+ return -ENOTTY;
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ /* First, try to remove the subvolume. If it happens to be
+ * already empty, this will just work. */
+ strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
+ if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
+ (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
+ return 0;
+ }
+ if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
+ return -errno;
+
+ /* OK, the subvolume is not empty, let's look for child
+ * subvolumes, and remove them, first */
+
+ args.key.min_offset = args.key.max_offset = subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return -errno;
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+ _cleanup_free_ char *p = NULL;
+ const struct btrfs_root_ref *ref;
+ struct btrfs_ioctl_ino_lookup_args ino_args;
+
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->type != BTRFS_ROOT_BACKREF_KEY)
+ continue;
+ if (sh->offset != subvol_id)
+ continue;
+
+ ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
+ if (!p)
+ return -ENOMEM;
+
+ zero(ino_args);
+ ino_args.treeid = subvol_id;
+ ino_args.objectid = htole64(ref->dirid);
+
+ if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
+ return -errno;
+
+ if (!made_writable) {
+ r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
+ if (r < 0)
+ return r;
+
+ made_writable = true;
+ }
+
+ if (isempty(ino_args.name))
+ /* Subvolume is in the top-level
+ * directory of the subvolume. */
+ r = subvol_remove_children(subvol_fd, p, sh->objectid, flags);
+ else {
+ _cleanup_close_ int child_fd = -1;
+
+ /* Subvolume is somewhere further down,
+ * hence we need to open the
+ * containing directory first */
+
+ child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (child_fd < 0)
+ return -errno;
+
+ r = subvol_remove_children(child_fd, p, sh->objectid, flags);
+ }
+ if (r < 0)
+ return r;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ /* OK, the child subvolumes should all be gone now, let's try
+ * again to remove the subvolume */
+ if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
+ return -errno;
+
+ (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
+ return 0;
+}
+
+int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) {
+ _cleanup_close_ int fd = -1;
+ const char *subvolume;
+ int r;
+
+ assert(path);
+
+ r = extract_subvolume_name(path, &subvolume);
+ if (r < 0)
+ return r;
+
+ fd = open_parent(path, O_CLOEXEC, 0);
+ if (fd < 0)
+ return fd;
+
+ return subvol_remove_children(fd, subvolume, 0, flags);
+}
+
+int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) {
+ return subvol_remove_children(fd, subvolume, 0, flags);
+}
+
+int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of quota items */
+ .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
+
+ /* The object ID is always 0 */
+ .key.min_objectid = 0,
+ .key.max_objectid = 0,
+
+ /* Look precisely for the quota items */
+ .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
+ .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
+
+ /* For our qgroup */
+ .key.min_offset = old_qgroupid,
+ .key.max_offset = old_qgroupid,
+
+ /* No restrictions on the other components */
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ int r;
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
+ if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
+ break;
+
+ return -errno;
+ }
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+ const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+ struct btrfs_ioctl_qgroup_limit_args qargs;
+ unsigned c;
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->objectid != 0)
+ continue;
+ if (sh->type != BTRFS_QGROUP_LIMIT_KEY)
+ continue;
+ if (sh->offset != old_qgroupid)
+ continue;
+
+ /* We found the entry, now copy things over. */
+
+ qargs = (struct btrfs_ioctl_qgroup_limit_args) {
+ .qgroupid = new_qgroupid,
+
+ .lim.max_rfer = le64toh(qli->max_rfer),
+ .lim.max_excl = le64toh(qli->max_excl),
+ .lim.rsv_rfer = le64toh(qli->rsv_rfer),
+ .lim.rsv_excl = le64toh(qli->rsv_excl),
+
+ .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
+ BTRFS_QGROUP_LIMIT_MAX_EXCL|
+ BTRFS_QGROUP_LIMIT_RSV_RFER|
+ BTRFS_QGROUP_LIMIT_RSV_EXCL),
+ };
+
+ for (c = 0;; c++) {
+ if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+ return -errno;
+ }
+
+ break;
+ }
+
+ return 1;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ return 0;
+}
+
+static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
+ _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
+ bool copy_from_parent = false, insert_intermediary_qgroup = false;
+ int n_old_qgroups, n_old_parent_qgroups, r, i;
+ uint64_t old_parent_id;
+
+ assert(fd >= 0);
+
+ /* Copies a reduced form of quota information from the old to
+ * the new subvolume. */
+
+ n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
+ if (n_old_qgroups <= 0) /* Nothing to copy */
+ return n_old_qgroups;
+
+ r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
+ if (r == -ENXIO)
+ /* We have no parent, hence nothing to copy. */
+ n_old_parent_qgroups = 0;
+ else if (r < 0)
+ return r;
+ else {
+ n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
+ if (n_old_parent_qgroups < 0)
+ return n_old_parent_qgroups;
+ }
+
+ for (i = 0; i < n_old_qgroups; i++) {
+ uint64_t id;
+ int j;
+
+ r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
+ if (r < 0)
+ return r;
+
+ if (id == old_subvol_id) {
+ /* The old subvolume was member of a qgroup
+ * that had the same id, but a different level
+ * as it self. Let's set up something similar
+ * in the destination. */
+ insert_intermediary_qgroup = true;
+ break;
+ }
+
+ for (j = 0; j < n_old_parent_qgroups; j++)
+ if (old_parent_qgroups[j] == old_qgroups[i]) {
+ /* The old subvolume shared a common
+ * parent qgroup with its parent
+ * subvolume. Let's set up something
+ * similar in the destination. */
+ copy_from_parent = true;
+ }
+ }
+
+ if (!insert_intermediary_qgroup && !copy_from_parent)
+ return 0;
+
+ return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
+}
+
+static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
+ uint64_t old_subtree_qgroup, new_subtree_qgroup;
+ bool changed;
+ int r;
+
+ /* First copy the leaf limits */
+ r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
+ if (r < 0)
+ return r;
+ changed = r > 0;
+
+ /* Then, try to copy the subtree limits, if there are any. */
+ r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return changed;
+
+ r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return changed;
+
+ r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
+ if (r != 0)
+ return r;
+
+ return changed;
+}
+
+static int subvol_snapshot_children(
+ int old_fd,
+ int new_fd,
+ const char *subvolume,
+ uint64_t old_subvol_id,
+ BtrfsSnapshotFlags flags) {
+
+ struct btrfs_ioctl_search_args args = {
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
+ .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
+
+ .key.min_type = BTRFS_ROOT_BACKREF_KEY,
+ .key.max_type = BTRFS_ROOT_BACKREF_KEY,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ struct btrfs_ioctl_vol_args_v2 vol_args = {
+ .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
+ .fd = old_fd,
+ };
+ _cleanup_close_ int subvolume_fd = -1;
+ uint64_t new_subvol_id;
+ int r;
+
+ assert(old_fd >= 0);
+ assert(new_fd >= 0);
+ assert(subvolume);
+
+ strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
+
+ if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
+ return -errno;
+
+ if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
+ !(flags & BTRFS_SNAPSHOT_QUOTA))
+ return 0;
+
+ if (old_subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
+ if (r < 0)
+ return r;
+
+ if (flags & BTRFS_SNAPSHOT_QUOTA)
+ (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
+
+ if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
+
+ if (flags & BTRFS_SNAPSHOT_QUOTA)
+ (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
+
+ return 0;
+ }
+
+ args.key.min_offset = args.key.max_offset = old_subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return -errno;
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+ _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
+ struct btrfs_ioctl_ino_lookup_args ino_args;
+ const struct btrfs_root_ref *ref;
+ _cleanup_close_ int old_child_fd = -1, new_child_fd = -1;
+
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->type != BTRFS_ROOT_BACKREF_KEY)
+ continue;
+
+ /* Avoid finding the source subvolume a second
+ * time */
+ if (sh->offset != old_subvol_id)
+ continue;
+
+ /* Avoid running into loops if the new
+ * subvolume is below the old one. */
+ if (sh->objectid == new_subvol_id)
+ continue;
+
+ ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+ p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
+ if (!p)
+ return -ENOMEM;
+
+ zero(ino_args);
+ ino_args.treeid = old_subvol_id;
+ ino_args.objectid = htole64(ref->dirid);
+
+ if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
+ return -errno;
+
+ /* The kernel returns an empty name if the
+ * subvolume is in the top-level directory,
+ * and otherwise appends a slash, so that we
+ * can just concatenate easily here, without
+ * adding a slash. */
+ c = strappend(ino_args.name, p);
+ if (!c)
+ return -ENOMEM;
+
+ old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (old_child_fd < 0)
+ return -errno;
+
+ np = strjoin(subvolume, "/", ino_args.name);
+ if (!np)
+ return -ENOMEM;
+
+ new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (new_child_fd < 0)
+ return -errno;
+
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
+ /* If the snapshot is read-only we
+ * need to mark it writable
+ * temporarily, to put the subsnapshot
+ * into place. */
+
+ if (subvolume_fd < 0) {
+ subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (subvolume_fd < 0)
+ return -errno;
+ }
+
+ r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
+ if (r < 0)
+ return r;
+ }
+
+ /* When btrfs clones the subvolumes, child
+ * subvolumes appear as empty directories. Remove
+ * them, so that we can create a new snapshot
+ * in their place */
+ if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
+ int k = -errno;
+
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY)
+ (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
+
+ return k;
+ }
+
+ r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY);
+
+ /* Restore the readonly flag */
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
+ int k;
+
+ k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
+ if (r >= 0 && k < 0)
+ return k;
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ if (flags & BTRFS_SNAPSHOT_QUOTA)
+ (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
+
+ return 0;
+}
+
+int btrfs_subvol_snapshot_fd_full(
+ int old_fd,
+ const char *new_path,
+ BtrfsSnapshotFlags flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_close_ int new_fd = -1;
+ const char *subvolume;
+ int r;
+
+ assert(old_fd >= 0);
+ assert(new_path);
+
+ r = btrfs_is_subvol_fd(old_fd);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ bool plain_directory = false;
+
+ /* If the source isn't a proper subvolume, fail unless fallback is requested */
+ if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
+ return -EISDIR;
+
+ r = btrfs_subvol_make(new_path);
+ if (r == -ENOTTY && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) {
+ /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
+ if (mkdir(new_path, 0755) < 0)
+ return -errno;
+
+ plain_directory = true;
+ } else if (r < 0)
+ return r;
+
+ r = copy_directory_fd_full(old_fd, new_path, COPY_MERGE|COPY_REFLINK, progress_path, progress_bytes, userdata);
+ if (r < 0)
+ goto fallback_fail;
+
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
+
+ if (plain_directory) {
+ /* Plain directories have no recursive read-only flag, but something pretty close to
+ * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
+
+ if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE)
+ (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
+ } else {
+ r = btrfs_subvol_set_read_only(new_path, true);
+ if (r < 0)
+ goto fallback_fail;
+ }
+ }
+
+ return 0;
+
+ fallback_fail:
+ (void) rm_rf(new_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ return r;
+ }
+
+ r = extract_subvolume_name(new_path, &subvolume);
+ if (r < 0)
+ return r;
+
+ new_fd = open_parent(new_path, O_CLOEXEC, 0);
+ if (new_fd < 0)
+ return new_fd;
+
+ return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
+}
+
+int btrfs_subvol_snapshot_full(
+ const char *old_path,
+ const char *new_path,
+ BtrfsSnapshotFlags flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_close_ int old_fd = -1;
+
+ assert(old_path);
+ assert(new_path);
+
+ old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (old_fd < 0)
+ return -errno;
+
+ return btrfs_subvol_snapshot_fd_full(old_fd, new_path, flags, progress_path, progress_bytes, userdata);
+}
+
+int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of quota items */
+ .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
+
+ /* Look precisely for the quota relation items */
+ .key.min_type = BTRFS_QGROUP_RELATION_KEY,
+ .key.max_type = BTRFS_QGROUP_RELATION_KEY,
+
+ /* No restrictions on the other components */
+ .key.min_offset = 0,
+ .key.max_offset = (uint64_t) -1,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ _cleanup_free_ uint64_t *items = NULL;
+ size_t n_items = 0, n_allocated = 0;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (qgroupid == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &qgroupid);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_objectid = args.key.max_objectid = qgroupid;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
+ if (errno == ENOENT) /* quota tree missing: quota is disabled */
+ break;
+
+ return -errno;
+ }
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->type != BTRFS_QGROUP_RELATION_KEY)
+ continue;
+ if (sh->offset < sh->objectid)
+ continue;
+ if (sh->objectid != qgroupid)
+ continue;
+
+ if (!GREEDY_REALLOC(items, n_allocated, n_items+1))
+ return -ENOMEM;
+
+ items[n_items++] = sh->offset;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ if (n_items <= 0) {
+ *ret = NULL;
+ return 0;
+ }
+
+ *ret = TAKE_PTR(items);
+
+ return (int) n_items;
+}
+
+int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
+ _cleanup_free_ uint64_t *qgroups = NULL;
+ uint64_t parent_subvol;
+ bool changed = false;
+ int n = 0, r;
+
+ assert(fd >= 0);
+
+ /*
+ * Sets up the specified subvolume's qgroup automatically in
+ * one of two ways:
+ *
+ * If insert_intermediary_qgroup is false, the subvolume's
+ * leaf qgroup will be assigned to the same parent qgroups as
+ * the subvolume's parent subvolume.
+ *
+ * If insert_intermediary_qgroup is true a new intermediary
+ * higher-level qgroup is created, with a higher level number,
+ * but reusing the id of the subvolume. The level number is
+ * picked as one smaller than the lowest level qgroup the
+ * parent subvolume is a member of. If the parent subvolume's
+ * leaf qgroup is assigned to no higher-level qgroup a new
+ * qgroup of level 255 is created instead. Either way, the new
+ * qgroup is then assigned to the parent's higher-level
+ * qgroup, and the subvolume itself is assigned to it.
+ *
+ * If the subvolume is already assigned to a higher level
+ * qgroup, no operation is executed.
+ *
+ * Effectively this means: regardless if
+ * insert_intermediary_qgroup is true or not, after this
+ * function is invoked the subvolume will be accounted within
+ * the same qgroups as the parent. However, if it is true, it
+ * will also get its own higher-level qgroup, which may in
+ * turn be used by subvolumes created beneath this subvolume
+ * later on.
+ *
+ * This hence defines a simple default qgroup setup for
+ * subvolumes, as long as this function is invoked on each
+ * created subvolume: each subvolume is always accounting
+ * together with its immediate parents. Optionally, if
+ * insert_intermediary_qgroup is true, it will also get a
+ * qgroup that then includes all its own child subvolumes.
+ */
+
+ if (subvol_id == 0) {
+ r = btrfs_is_subvol_fd(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
+ if (n < 0)
+ return n;
+ if (n > 0) /* already parent qgroups set up, let's bail */
+ return 0;
+
+ qgroups = mfree(qgroups);
+
+ r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
+ if (r == -ENXIO)
+ /* No parent, hence no qgroup memberships */
+ n = 0;
+ else if (r < 0)
+ return r;
+ else {
+ n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
+ if (n < 0)
+ return n;
+ }
+
+ if (insert_intermediary_qgroup) {
+ uint64_t lowest = 256, new_qgroupid;
+ bool created = false;
+ int i;
+
+ /* Determine the lowest qgroup that the parent
+ * subvolume is assigned to. */
+
+ for (i = 0; i < n; i++) {
+ uint64_t level;
+
+ r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
+ if (r < 0)
+ return r;
+
+ if (level < lowest)
+ lowest = level;
+ }
+
+ if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
+ return -EBUSY;
+
+ r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
+ if (r < 0)
+ return r;
+
+ /* Create the new intermediary group, unless it already exists */
+ r = btrfs_qgroup_create(fd, new_qgroupid);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ if (r >= 0)
+ changed = created = true;
+
+ for (i = 0; i < n; i++) {
+ r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
+ if (r < 0 && r != -EEXIST) {
+ if (created)
+ (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
+
+ return r;
+ }
+ if (r >= 0)
+ changed = true;
+ }
+
+ r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
+ if (r < 0 && r != -EEXIST) {
+ if (created)
+ (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
+ return r;
+ }
+ if (r >= 0)
+ changed = true;
+
+ } else {
+ int i;
+
+ /* Assign our subvolume to all the same qgroups as the parent */
+
+ for (i = 0; i < n; i++) {
+ r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ if (r >= 0)
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
+}
+
+int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of tree roots */
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ /* Look precisely for the subvolume items */
+ .key.min_type = BTRFS_ROOT_BACKREF_KEY,
+ .key.max_type = BTRFS_ROOT_BACKREF_KEY,
+
+ /* No restrictions on the other components */
+ .key.min_offset = 0,
+ .key.max_offset = (uint64_t) -1,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_objectid = args.key.max_objectid = subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return negative_errno();
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ if (sh->type != BTRFS_ROOT_BACKREF_KEY)
+ continue;
+ if (sh->objectid != subvol_id)
+ continue;
+
+ *ret = sh->offset;
+ return 0;
+ }
+ }
+
+ return -ENXIO;
+}
diff --git a/src/basic/btrfs-util.h b/src/basic/btrfs-util.h
new file mode 100644
index 0000000..7d848a7
--- /dev/null
+++ b/src/basic/btrfs-util.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+#include "copy.h"
+#include "time-util.h"
+
+typedef struct BtrfsSubvolInfo {
+ uint64_t subvol_id;
+ usec_t otime;
+
+ sd_id128_t uuid;
+ sd_id128_t parent_uuid;
+
+ bool read_only;
+} BtrfsSubvolInfo;
+
+typedef struct BtrfsQuotaInfo {
+ uint64_t referenced;
+ uint64_t exclusive;
+ uint64_t referenced_max;
+ uint64_t exclusive_max;
+} BtrfsQuotaInfo;
+
+typedef enum BtrfsSnapshotFlags {
+ BTRFS_SNAPSHOT_FALLBACK_COPY = 1 << 0, /* If the source isn't a subvolume, reflink everything */
+ BTRFS_SNAPSHOT_READ_ONLY = 1 << 1,
+ BTRFS_SNAPSHOT_RECURSIVE = 1 << 2,
+ BTRFS_SNAPSHOT_QUOTA = 1 << 3,
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY = 1 << 4, /* If the destination doesn't support subvolumes, reflink/copy instead */
+ BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE = 1 << 5, /* When we can't create a subvolume, use the FS_IMMUTABLE attribute for indicating read-only */
+} BtrfsSnapshotFlags;
+
+typedef enum BtrfsRemoveFlags {
+ BTRFS_REMOVE_RECURSIVE = 1 << 0,
+ BTRFS_REMOVE_QUOTA = 1 << 1,
+} BtrfsRemoveFlags;
+
+int btrfs_is_filesystem(int fd);
+
+int btrfs_is_subvol_fd(int fd);
+int btrfs_is_subvol(const char *path);
+
+int btrfs_reflink(int infd, int outfd);
+int btrfs_clone_range(int infd, uint64_t in_offset, int ofd, uint64_t out_offset, uint64_t sz);
+
+int btrfs_get_block_device_fd(int fd, dev_t *dev);
+int btrfs_get_block_device(const char *path, dev_t *dev);
+
+int btrfs_defrag_fd(int fd);
+int btrfs_defrag(const char *p);
+
+int btrfs_quota_enable_fd(int fd, bool b);
+int btrfs_quota_enable(const char *path, bool b);
+
+int btrfs_quota_scan_start(int fd);
+int btrfs_quota_scan_wait(int fd);
+int btrfs_quota_scan_ongoing(int fd);
+
+int btrfs_subvol_make(const char *path);
+int btrfs_subvol_make_fd(int fd, const char *subvolume);
+
+int btrfs_subvol_snapshot_fd_full(int old_fd, const char *new_path, BtrfsSnapshotFlags flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int btrfs_subvol_snapshot_fd(int old_fd, const char *new_path, BtrfsSnapshotFlags flags) {
+ return btrfs_subvol_snapshot_fd_full(old_fd, new_path, flags, NULL, NULL, NULL);
+}
+
+int btrfs_subvol_snapshot_full(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int btrfs_subvol_snapshot(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags) {
+ return btrfs_subvol_snapshot_full(old_path, new_path, flags, NULL, NULL, NULL);
+}
+
+int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags);
+int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags);
+
+int btrfs_subvol_set_read_only_fd(int fd, bool b);
+int btrfs_subvol_set_read_only(const char *path, bool b);
+int btrfs_subvol_get_read_only_fd(int fd);
+
+int btrfs_subvol_get_id(int fd, const char *subvolume, uint64_t *ret);
+int btrfs_subvol_get_id_fd(int fd, uint64_t *ret);
+int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret);
+
+int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *info);
+
+int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret);
+
+int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *quota);
+int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *quota);
+
+int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max);
+int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max);
+
+int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool new_qgroup);
+int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup);
+
+int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret);
+int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id);
+
+int btrfs_qgroup_create(int fd, uint64_t qgroupid);
+int btrfs_qgroup_destroy(int fd, uint64_t qgroupid);
+int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid);
+
+int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max);
+int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max);
+
+int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid);
+
+int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent);
+int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent);
+
+int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret);
+
+int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *quota);
+int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *quota);
diff --git a/src/basic/build.h b/src/basic/build.h
new file mode 100644
index 0000000..7a59059
--- /dev/null
+++ b/src/basic/build.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "version.h"
+
+#if HAVE_PAM
+#define _PAM_FEATURE_ "+PAM"
+#else
+#define _PAM_FEATURE_ "-PAM"
+#endif
+
+#if HAVE_AUDIT
+#define _AUDIT_FEATURE_ "+AUDIT"
+#else
+#define _AUDIT_FEATURE_ "-AUDIT"
+#endif
+
+#if HAVE_SELINUX
+#define _SELINUX_FEATURE_ "+SELINUX"
+#else
+#define _SELINUX_FEATURE_ "-SELINUX"
+#endif
+
+#if HAVE_APPARMOR
+#define _APPARMOR_FEATURE_ "+APPARMOR"
+#else
+#define _APPARMOR_FEATURE_ "-APPARMOR"
+#endif
+
+#if ENABLE_IMA
+#define _IMA_FEATURE_ "+IMA"
+#else
+#define _IMA_FEATURE_ "-IMA"
+#endif
+
+#if ENABLE_SMACK
+#define _SMACK_FEATURE_ "+SMACK"
+#else
+#define _SMACK_FEATURE_ "-SMACK"
+#endif
+
+#if HAVE_SYSV_COMPAT
+#define _SYSVINIT_FEATURE_ "+SYSVINIT"
+#else
+#define _SYSVINIT_FEATURE_ "-SYSVINIT"
+#endif
+
+#if ENABLE_UTMP
+#define _UTMP_FEATURE_ "+UTMP"
+#else
+#define _UTMP_FEATURE_ "-UTMP"
+#endif
+
+#if HAVE_LIBCRYPTSETUP
+#define _LIBCRYPTSETUP_FEATURE_ "+LIBCRYPTSETUP"
+#else
+#define _LIBCRYPTSETUP_FEATURE_ "-LIBCRYPTSETUP"
+#endif
+
+#if HAVE_GCRYPT
+#define _GCRYPT_FEATURE_ "+GCRYPT"
+#else
+#define _GCRYPT_FEATURE_ "-GCRYPT"
+#endif
+
+#if HAVE_GNUTLS
+#define _GNUTLS_FEATURE_ "+GNUTLS"
+#else
+#define _GNUTLS_FEATURE_ "-GNUTLS"
+#endif
+
+#if HAVE_ACL
+#define _ACL_FEATURE_ "+ACL"
+#else
+#define _ACL_FEATURE_ "-ACL"
+#endif
+
+#if HAVE_XZ
+#define _XZ_FEATURE_ "+XZ"
+#else
+#define _XZ_FEATURE_ "-XZ"
+#endif
+
+#if HAVE_LZ4
+#define _LZ4_FEATURE_ "+LZ4"
+#else
+#define _LZ4_FEATURE_ "-LZ4"
+#endif
+
+#if HAVE_SECCOMP
+#define _SECCOMP_FEATURE_ "+SECCOMP"
+#else
+#define _SECCOMP_FEATURE_ "-SECCOMP"
+#endif
+
+#if HAVE_BLKID
+#define _BLKID_FEATURE_ "+BLKID"
+#else
+#define _BLKID_FEATURE_ "-BLKID"
+#endif
+
+#if HAVE_ELFUTILS
+#define _ELFUTILS_FEATURE_ "+ELFUTILS"
+#else
+#define _ELFUTILS_FEATURE_ "-ELFUTILS"
+#endif
+
+#if HAVE_KMOD
+#define _KMOD_FEATURE_ "+KMOD"
+#else
+#define _KMOD_FEATURE_ "-KMOD"
+#endif
+
+#if HAVE_LIBIDN2
+#define _IDN2_FEATURE_ "+IDN2"
+#else
+#define _IDN2_FEATURE_ "-IDN2"
+#endif
+
+#if HAVE_LIBIDN
+#define _IDN_FEATURE_ "+IDN"
+#else
+#define _IDN_FEATURE_ "-IDN"
+#endif
+
+#if HAVE_PCRE2
+#define _PCRE2_FEATURE_ "+PCRE2"
+#else
+#define _PCRE2_FEATURE_ "-PCRE2"
+#endif
+
+#define _CGROUP_HIEARCHY_ "default-hierarchy=" DEFAULT_HIERARCHY_NAME
+
+#define SYSTEMD_FEATURES \
+ _PAM_FEATURE_ " " \
+ _AUDIT_FEATURE_ " " \
+ _SELINUX_FEATURE_ " " \
+ _IMA_FEATURE_ " " \
+ _APPARMOR_FEATURE_ " " \
+ _SMACK_FEATURE_ " " \
+ _SYSVINIT_FEATURE_ " " \
+ _UTMP_FEATURE_ " " \
+ _LIBCRYPTSETUP_FEATURE_ " " \
+ _GCRYPT_FEATURE_ " " \
+ _GNUTLS_FEATURE_ " " \
+ _ACL_FEATURE_ " " \
+ _XZ_FEATURE_ " " \
+ _LZ4_FEATURE_ " " \
+ _SECCOMP_FEATURE_ " " \
+ _BLKID_FEATURE_ " " \
+ _ELFUTILS_FEATURE_ " " \
+ _KMOD_FEATURE_ " " \
+ _IDN2_FEATURE_ " " \
+ _IDN_FEATURE_ " " \
+ _PCRE2_FEATURE_ " " \
+ _CGROUP_HIEARCHY_
diff --git a/src/basic/bus-label.c b/src/basic/bus-label.c
new file mode 100644
index 0000000..1613cf7
--- /dev/null
+++ b/src/basic/bus-label.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bus-label.h"
+#include "hexdecoct.h"
+#include "macro.h"
+
+char *bus_label_escape(const char *s) {
+ char *r, *t;
+ const char *f;
+
+ assert_return(s, NULL);
+
+ /* Escapes all chars that D-Bus' object path cannot deal
+ * with. Can be reversed with bus_path_unescape(). We special
+ * case the empty string. */
+
+ if (*s == 0)
+ return strdup("_");
+
+ r = new(char, strlen(s)*3 + 1);
+ if (!r)
+ return NULL;
+
+ for (f = s, t = r; *f; f++) {
+
+ /* Escape everything that is not a-zA-Z0-9. We also
+ * escape 0-9 if it's the first character */
+
+ if (!(*f >= 'A' && *f <= 'Z') &&
+ !(*f >= 'a' && *f <= 'z') &&
+ !(f > s && *f >= '0' && *f <= '9')) {
+ *(t++) = '_';
+ *(t++) = hexchar(*f >> 4);
+ *(t++) = hexchar(*f);
+ } else
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ return r;
+}
+
+char *bus_label_unescape_n(const char *f, size_t l) {
+ char *r, *t;
+ size_t i;
+
+ assert_return(f, NULL);
+
+ /* Special case for the empty string */
+ if (l == 1 && *f == '_')
+ return strdup("");
+
+ r = new(char, l + 1);
+ if (!r)
+ return NULL;
+
+ for (i = 0, t = r; i < l; ++i) {
+ if (f[i] == '_') {
+ int a, b;
+
+ if (l - i < 3 ||
+ (a = unhexchar(f[i + 1])) < 0 ||
+ (b = unhexchar(f[i + 2])) < 0) {
+ /* Invalid escape code, let's take it literal then */
+ *(t++) = '_';
+ } else {
+ *(t++) = (char) ((a << 4) | b);
+ i += 2;
+ }
+ } else
+ *(t++) = f[i];
+ }
+
+ *t = 0;
+
+ return r;
+}
diff --git a/src/basic/bus-label.h b/src/basic/bus-label.h
new file mode 100644
index 0000000..664cfaf
--- /dev/null
+++ b/src/basic/bus-label.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "string-util.h"
+
+char *bus_label_escape(const char *s);
+char *bus_label_unescape_n(const char *f, size_t l);
+
+static inline char *bus_label_unescape(const char *f) {
+ return bus_label_unescape_n(f, strlen_ptr(f));
+}
diff --git a/src/basic/cap-list.c b/src/basic/cap-list.c
new file mode 100644
index 0000000..29a17d9
--- /dev/null
+++ b/src/basic/cap-list.c
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "cap-list.h"
+#include "extract-word.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "util.h"
+
+static const struct capability_name* lookup_capability(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "cap-from-name.h"
+#include "cap-to-name.h"
+
+const char *capability_to_name(int id) {
+
+ if (id < 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(capability_names))
+ return NULL;
+
+ return capability_names[id];
+}
+
+int capability_from_name(const char *name) {
+ const struct capability_name *sc;
+ int r, i;
+
+ assert(name);
+
+ /* Try to parse numeric capability */
+ r = safe_atoi(name, &i);
+ if (r >= 0) {
+ if (i >= 0 && (size_t) i < ELEMENTSOF(capability_names))
+ return i;
+ else
+ return -EINVAL;
+ }
+
+ /* Try to parse string capability */
+ sc = lookup_capability(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+int capability_list_length(void) {
+ return (int) ELEMENTSOF(capability_names);
+}
+
+int capability_set_to_string_alloc(uint64_t set, char **s) {
+ _cleanup_free_ char *str = NULL;
+ unsigned long i;
+ size_t allocated = 0, n = 0;
+
+ assert(s);
+
+ for (i = 0; i < cap_last_cap(); i++)
+ if (set & (UINT64_C(1) << i)) {
+ const char *p;
+ size_t add;
+
+ p = capability_to_name(i);
+ if (!p)
+ return -EINVAL;
+
+ add = strlen(p);
+
+ if (!GREEDY_REALLOC(str, allocated, n + add + 2))
+ return -ENOMEM;
+
+ strcpy(mempcpy(str + n, p, add), " ");
+ n += add + 1;
+ }
+
+ if (!GREEDY_REALLOC(str, allocated, n + 1))
+ return -ENOMEM;
+
+ str[n > 0 ? n - 1 : 0] = '\0'; /* truncate the last space, if it's there */
+
+ *s = TAKE_PTR(str);
+
+ return 0;
+}
+
+int capability_set_from_string(const char *s, uint64_t *set) {
+ uint64_t val = 0;
+ const char *p;
+
+ assert(set);
+
+ for (p = s;;) {
+ _cleanup_free_ char *word = NULL;
+ int r;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return r;
+ if (r <= 0)
+ break;
+
+ r = capability_from_name(word);
+ if (r < 0)
+ continue;
+
+ val |= ((uint64_t) UINT64_C(1)) << (uint64_t) r;
+ }
+
+ *set = val;
+
+ return 0;
+}
diff --git a/src/basic/cap-list.h b/src/basic/cap-list.h
new file mode 100644
index 0000000..ab41924
--- /dev/null
+++ b/src/basic/cap-list.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+const char *capability_to_name(int id);
+int capability_from_name(const char *name);
+int capability_list_length(void);
+
+int capability_set_to_string_alloc(uint64_t set, char **s);
+int capability_set_from_string(const char *s, uint64_t *set);
diff --git a/src/basic/cap-to-name.awk b/src/basic/cap-to-name.awk
new file mode 100644
index 0000000..402a782
--- /dev/null
+++ b/src/basic/cap-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const capability_names[] = { "
+}
+{
+ printf " [%s] = \"%s\",\n", $1, tolower($1)
+}
+END{
+ print "};"
+}
diff --git a/src/basic/capability-util.c b/src/basic/capability-util.c
new file mode 100644
index 0000000..b944ee6
--- /dev/null
+++ b/src/basic/capability-util.c
@@ -0,0 +1,487 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <grp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/capability.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_prctl.h"
+#include "parse-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int have_effective_cap(int value) {
+ _cleanup_cap_free_ cap_t cap;
+ cap_flag_value_t fv;
+
+ cap = cap_get_proc();
+ if (!cap)
+ return -errno;
+
+ if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0)
+ return -errno;
+ else
+ return fv == CAP_SET;
+}
+
+unsigned long cap_last_cap(void) {
+ static thread_local unsigned long saved;
+ static thread_local bool valid = false;
+ _cleanup_free_ char *content = NULL;
+ unsigned long p = 0;
+ int r;
+
+ if (valid)
+ return saved;
+
+ /* available since linux-3.2 */
+ r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content);
+ if (r >= 0) {
+ r = safe_atolu(content, &p);
+ if (r >= 0) {
+ saved = p;
+ valid = true;
+ return p;
+ }
+ }
+
+ /* fall back to syscall-probing for pre linux-3.2 */
+ p = (unsigned long) CAP_LAST_CAP;
+
+ if (prctl(PR_CAPBSET_READ, p) < 0) {
+
+ /* Hmm, look downwards, until we find one that
+ * works */
+ for (p--; p > 0; p --)
+ if (prctl(PR_CAPBSET_READ, p) >= 0)
+ break;
+
+ } else {
+
+ /* Hmm, look upwards, until we find one that doesn't
+ * work */
+ for (;; p++)
+ if (prctl(PR_CAPBSET_READ, p+1) < 0)
+ break;
+ }
+
+ saved = p;
+ valid = true;
+
+ return p;
+}
+
+int capability_update_inherited_set(cap_t caps, uint64_t set) {
+ unsigned long i;
+
+ /* Add capabilities in the set to the inherited caps. Do not apply
+ * them yet. */
+
+ for (i = 0; i < cap_last_cap(); i++) {
+
+ if (set & (UINT64_C(1) << i)) {
+ cap_value_t v;
+
+ v = (cap_value_t) i;
+
+ /* Make the capability inheritable. */
+ if (cap_set_flag(caps, CAP_INHERITABLE, 1, &v, CAP_SET) < 0)
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+int capability_ambient_set_apply(uint64_t set, bool also_inherit) {
+ unsigned long i;
+ _cleanup_cap_free_ cap_t caps = NULL;
+
+ /* Add the capabilities to the ambient set. */
+
+ if (also_inherit) {
+ int r;
+ caps = cap_get_proc();
+ if (!caps)
+ return -errno;
+
+ r = capability_update_inherited_set(caps, set);
+ if (r < 0)
+ return -errno;
+
+ if (cap_set_proc(caps) < 0)
+ return -errno;
+ }
+
+ for (i = 0; i < cap_last_cap(); i++) {
+
+ if (set & (UINT64_C(1) << i)) {
+
+ /* Add the capability to the ambient set. */
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) < 0)
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+int capability_bounding_set_drop(uint64_t keep, bool right_now) {
+ _cleanup_cap_free_ cap_t before_cap = NULL, after_cap = NULL;
+ cap_flag_value_t fv;
+ unsigned long i;
+ int r;
+
+ /* If we are run as PID 1 we will lack CAP_SETPCAP by default
+ * in the effective set (yes, the kernel drops that when
+ * executing init!), so get it back temporarily so that we can
+ * call PR_CAPBSET_DROP. */
+
+ before_cap = cap_get_proc();
+ if (!before_cap)
+ return -errno;
+
+ if (cap_get_flag(before_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0)
+ return -errno;
+
+ if (fv != CAP_SET) {
+ _cleanup_cap_free_ cap_t temp_cap = NULL;
+ static const cap_value_t v = CAP_SETPCAP;
+
+ temp_cap = cap_dup(before_cap);
+ if (!temp_cap)
+ return -errno;
+
+ if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0)
+ return -errno;
+
+ if (cap_set_proc(temp_cap) < 0)
+ log_debug_errno(errno, "Can't acquire effective CAP_SETPCAP bit, ignoring: %m");
+
+ /* If we didn't manage to acquire the CAP_SETPCAP bit, we continue anyway, after all this just means
+ * we'll fail later, when we actually intend to drop some capabilities. */
+ }
+
+ after_cap = cap_dup(before_cap);
+ if (!after_cap)
+ return -errno;
+
+ for (i = 0; i <= cap_last_cap(); i++) {
+ cap_value_t v;
+
+ if ((keep & (UINT64_C(1) << i)))
+ continue;
+
+ /* Drop it from the bounding set */
+ if (prctl(PR_CAPBSET_DROP, i) < 0) {
+ r = -errno;
+
+ /* If dropping the capability failed, let's see if we didn't have it in the first place. If so,
+ * continue anyway, as dropping a capability we didn't have in the first place doesn't really
+ * matter anyway. */
+ if (prctl(PR_CAPBSET_READ, i) != 0)
+ goto finish;
+ }
+ v = (cap_value_t) i;
+
+ /* Also drop it from the inheritable set, so
+ * that anything we exec() loses the
+ * capability for good. */
+ if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ /* If we shall apply this right now drop it
+ * also from our own capability sets. */
+ if (right_now) {
+ if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 ||
+ cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+ }
+
+ r = 0;
+
+finish:
+ if (cap_set_proc(after_cap) < 0) {
+ /* If there are no actual changes anyway then let's ignore this error. */
+ if (cap_compare(before_cap, after_cap) != 0)
+ r = -errno;
+ }
+
+ return r;
+}
+
+static int drop_from_file(const char *fn, uint64_t keep) {
+ _cleanup_free_ char *p = NULL;
+ uint64_t current, after;
+ uint32_t hi, lo;
+ int r, k;
+
+ r = read_one_line_file(fn, &p);
+ if (r < 0)
+ return r;
+
+ assert_cc(sizeof(hi) == sizeof(unsigned));
+ assert_cc(sizeof(lo) == sizeof(unsigned));
+
+ k = sscanf(p, "%u %u", &lo, &hi);
+ if (k != 2)
+ return -EIO;
+
+ current = (uint64_t) lo | ((uint64_t) hi << 32ULL);
+ after = current & keep;
+
+ if (current == after)
+ return 0;
+
+ lo = (unsigned) (after & 0xFFFFFFFFULL);
+ hi = (unsigned) ((after >> 32ULL) & 0xFFFFFFFFULL);
+
+ return write_string_filef(fn, WRITE_STRING_FILE_CREATE, "%u %u", lo, hi);
+}
+
+int capability_bounding_set_drop_usermode(uint64_t keep) {
+ int r;
+
+ r = drop_from_file("/proc/sys/kernel/usermodehelper/inheritable", keep);
+ if (r < 0)
+ return r;
+
+ r = drop_from_file("/proc/sys/kernel/usermodehelper/bset", keep);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
+ _cleanup_cap_free_ cap_t d = NULL;
+ unsigned i, j = 0;
+ int r;
+
+ /* Unfortunately we cannot leave privilege dropping to PID 1
+ * here, since we want to run as user but want to keep some
+ * capabilities. Since file capabilities have been introduced
+ * this cannot be done across exec() anymore, unless our
+ * binary has the capability configured in the file system,
+ * which we want to avoid. */
+
+ if (setresgid(gid, gid, gid) < 0)
+ return log_error_errno(errno, "Failed to change group ID: %m");
+
+ r = maybe_setgroups(0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop auxiliary groups list: %m");
+
+ /* Ensure we keep the permitted caps across the setresuid() */
+ if (prctl(PR_SET_KEEPCAPS, 1) < 0)
+ return log_error_errno(errno, "Failed to enable keep capabilities flag: %m");
+
+ if (setresuid(uid, uid, uid) < 0)
+ return log_error_errno(errno, "Failed to change user ID: %m");
+
+ if (prctl(PR_SET_KEEPCAPS, 0) < 0)
+ return log_error_errno(errno, "Failed to disable keep capabilities flag: %m");
+
+ /* Drop all caps from the bounding set, except the ones we want */
+ r = capability_bounding_set_drop(keep_capabilities, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop capabilities: %m");
+
+ /* Now upgrade the permitted caps we still kept to effective caps */
+ d = cap_init();
+ if (!d)
+ return log_oom();
+
+ if (keep_capabilities) {
+ cap_value_t bits[u64log2(keep_capabilities) + 1];
+
+ for (i = 0; i < ELEMENTSOF(bits); i++)
+ if (keep_capabilities & (1ULL << i))
+ bits[j++] = i;
+
+ /* use enough bits */
+ assert(i == 64 || (keep_capabilities >> i) == 0);
+ /* don't use too many bits */
+ assert(keep_capabilities & (1ULL << (i - 1)));
+
+ if (cap_set_flag(d, CAP_EFFECTIVE, j, bits, CAP_SET) < 0 ||
+ cap_set_flag(d, CAP_PERMITTED, j, bits, CAP_SET) < 0)
+ return log_error_errno(errno, "Failed to enable capabilities bits: %m");
+
+ if (cap_set_proc(d) < 0)
+ return log_error_errno(errno, "Failed to increase capabilities: %m");
+ }
+
+ return 0;
+}
+
+int drop_capability(cap_value_t cv) {
+ _cleanup_cap_free_ cap_t tmp_cap = NULL;
+
+ tmp_cap = cap_get_proc();
+ if (!tmp_cap)
+ return -errno;
+
+ if ((cap_set_flag(tmp_cap, CAP_INHERITABLE, 1, &cv, CAP_CLEAR) < 0) ||
+ (cap_set_flag(tmp_cap, CAP_PERMITTED, 1, &cv, CAP_CLEAR) < 0) ||
+ (cap_set_flag(tmp_cap, CAP_EFFECTIVE, 1, &cv, CAP_CLEAR) < 0))
+ return -errno;
+
+ if (cap_set_proc(tmp_cap) < 0)
+ return -errno;
+
+ return 0;
+}
+
+bool ambient_capabilities_supported(void) {
+ static int cache = -1;
+
+ if (cache >= 0)
+ return cache;
+
+ /* If PR_CAP_AMBIENT returns something valid, or an unexpected error code we assume that ambient caps are
+ * available. */
+
+ cache = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_KILL, 0, 0) >= 0 ||
+ !IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS);
+
+ return cache;
+}
+
+int capability_quintet_enforce(const CapabilityQuintet *q) {
+ _cleanup_cap_free_ cap_t c = NULL;
+ int r;
+
+ if (q->ambient != (uint64_t) -1) {
+ unsigned long i;
+ bool changed = false;
+
+ c = cap_get_proc();
+ if (!c)
+ return -errno;
+
+ /* In order to raise the ambient caps set we first need to raise the matching inheritable + permitted
+ * cap */
+ for (i = 0; i <= cap_last_cap(); i++) {
+ uint64_t m = UINT64_C(1) << i;
+ cap_value_t cv = (cap_value_t) i;
+ cap_flag_value_t old_value_inheritable, old_value_permitted;
+
+ if ((q->ambient & m) == 0)
+ continue;
+
+ if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value_inheritable) < 0)
+ return -errno;
+ if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value_permitted) < 0)
+ return -errno;
+
+ if (old_value_inheritable == CAP_SET && old_value_permitted == CAP_SET)
+ continue;
+
+ if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, CAP_SET) < 0)
+ return -errno;
+
+ if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
+ return -errno;
+
+ changed = true;
+ }
+
+ if (changed)
+ if (cap_set_proc(c) < 0)
+ return -errno;
+
+ r = capability_ambient_set_apply(q->ambient, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (q->inheritable != (uint64_t) -1 || q->permitted != (uint64_t) -1 || q->effective != (uint64_t) -1) {
+ bool changed = false;
+ unsigned long i;
+
+ if (!c) {
+ c = cap_get_proc();
+ if (!c)
+ return -errno;
+ }
+
+ for (i = 0; i <= cap_last_cap(); i++) {
+ uint64_t m = UINT64_C(1) << i;
+ cap_value_t cv = (cap_value_t) i;
+
+ if (q->inheritable != (uint64_t) -1) {
+ cap_flag_value_t old_value, new_value;
+
+ if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value) < 0)
+ return -errno;
+
+ new_value = (q->inheritable & m) ? CAP_SET : CAP_CLEAR;
+
+ if (old_value != new_value) {
+ changed = true;
+
+ if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, new_value) < 0)
+ return -errno;
+ }
+ }
+
+ if (q->permitted != (uint64_t) -1) {
+ cap_flag_value_t old_value, new_value;
+
+ if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value) < 0)
+ return -errno;
+
+ new_value = (q->permitted & m) ? CAP_SET : CAP_CLEAR;
+
+ if (old_value != new_value) {
+ changed = true;
+
+ if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, new_value) < 0)
+ return -errno;
+ }
+ }
+
+ if (q->effective != (uint64_t) -1) {
+ cap_flag_value_t old_value, new_value;
+
+ if (cap_get_flag(c, cv, CAP_EFFECTIVE, &old_value) < 0)
+ return -errno;
+
+ new_value = (q->effective & m) ? CAP_SET : CAP_CLEAR;
+
+ if (old_value != new_value) {
+ changed = true;
+
+ if (cap_set_flag(c, CAP_EFFECTIVE, 1, &cv, new_value) < 0)
+ return -errno;
+ }
+ }
+ }
+
+ if (changed)
+ if (cap_set_proc(c) < 0)
+ return -errno;
+ }
+
+ if (q->bounding != (uint64_t) -1) {
+ r = capability_bounding_set_drop(q->bounding, false);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/basic/capability-util.h b/src/basic/capability-util.h
new file mode 100644
index 0000000..02c7d5c
--- /dev/null
+++ b/src/basic/capability-util.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "missing_capability.h"
+#include "util.h"
+
+#define CAP_ALL (uint64_t) -1
+
+unsigned long cap_last_cap(void);
+int have_effective_cap(int value);
+int capability_bounding_set_drop(uint64_t keep, bool right_now);
+int capability_bounding_set_drop_usermode(uint64_t keep);
+
+int capability_ambient_set_apply(uint64_t set, bool also_inherit);
+int capability_update_inherited_set(cap_t caps, uint64_t ambient_set);
+
+int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities);
+
+int drop_capability(cap_value_t cv);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(cap_t, cap_free);
+#define _cleanup_cap_free_ _cleanup_(cap_freep)
+
+static inline void cap_free_charpp(char **p) {
+ if (*p)
+ cap_free(*p);
+}
+#define _cleanup_cap_free_charp_ _cleanup_(cap_free_charpp)
+
+static inline bool cap_test_all(uint64_t caps) {
+ uint64_t m;
+ m = (UINT64_C(1) << (cap_last_cap() + 1)) - 1;
+ return FLAGS_SET(caps, m);
+}
+
+bool ambient_capabilities_supported(void);
+
+/* Identical to linux/capability.h's CAP_TO_MASK(), but uses an unsigned 1U instead of a signed 1 for shifting left, in
+ * order to avoid complaints about shifting a signed int left by 31 bits, which would make it negative. */
+#define CAP_TO_MASK_CORRECTED(x) (1U << ((x) & 31U))
+
+typedef struct CapabilityQuintet {
+ /* Stores all five types of capabilities in one go. Note that we use (uint64_t) -1 for unset here. This hence
+ * needs to be updated as soon as Linux learns more than 63 caps. */
+ uint64_t effective;
+ uint64_t bounding;
+ uint64_t inheritable;
+ uint64_t permitted;
+ uint64_t ambient;
+} CapabilityQuintet;
+
+assert_cc(CAP_LAST_CAP < 64);
+
+#define CAPABILITY_QUINTET_NULL { (uint64_t) -1, (uint64_t) -1, (uint64_t) -1, (uint64_t) -1, (uint64_t) -1 }
+
+static inline bool capability_quintet_is_set(const CapabilityQuintet *q) {
+ return q->effective != (uint64_t) -1 ||
+ q->bounding != (uint64_t) -1 ||
+ q->inheritable != (uint64_t) -1 ||
+ q->permitted != (uint64_t) -1 ||
+ q->ambient != (uint64_t) -1;
+}
+
+int capability_quintet_enforce(const CapabilityQuintet *q);
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
new file mode 100644
index 0000000..8ce7ccb
--- /dev/null
+++ b/src/basic/cgroup-util.c
@@ -0,0 +1,2933 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <ftw.h>
+#include <limits.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "login-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "set.h"
+#include "special.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
+ _cleanup_free_ char *fs = NULL;
+ FILE *f;
+ int r;
+
+ assert(_f);
+
+ r = cg_get_path(controller, path, "cgroup.procs", &fs);
+ if (r < 0)
+ return r;
+
+ f = fopen(fs, "re");
+ if (!f)
+ return -errno;
+
+ *_f = f;
+ return 0;
+}
+
+int cg_read_pid(FILE *f, pid_t *_pid) {
+ unsigned long ul;
+
+ /* Note that the cgroup.procs might contain duplicates! See
+ * cgroups.txt for details. */
+
+ assert(f);
+ assert(_pid);
+
+ errno = 0;
+ if (fscanf(f, "%lu", &ul) != 1) {
+
+ if (feof(f))
+ return 0;
+
+ return errno > 0 ? -errno : -EIO;
+ }
+
+ if (ul <= 0)
+ return -EIO;
+
+ *_pid = (pid_t) ul;
+ return 1;
+}
+
+int cg_read_event(
+ const char *controller,
+ const char *path,
+ const char *event,
+ char **val) {
+
+ _cleanup_free_ char *events = NULL, *content = NULL;
+ char *p, *line;
+ int r;
+
+ r = cg_get_path(controller, path, "cgroup.events", &events);
+ if (r < 0)
+ return r;
+
+ r = read_full_file(events, &content, NULL);
+ if (r < 0)
+ return r;
+
+ p = content;
+ while ((line = strsep(&p, "\n"))) {
+ char *key;
+
+ key = strsep(&line, " ");
+ if (!key || !line)
+ return -EINVAL;
+
+ if (strcmp(key, event))
+ continue;
+
+ *val = strdup(line);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+bool cg_ns_supported(void) {
+ static thread_local int enabled = -1;
+
+ if (enabled >= 0)
+ return enabled;
+
+ if (access("/proc/self/ns/cgroup", F_OK) < 0) {
+ if (errno != ENOENT)
+ log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
+ enabled = false;
+ } else
+ enabled = true;
+
+ return enabled;
+}
+
+int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+ DIR *d;
+
+ assert(_d);
+
+ /* This is not recursive! */
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ d = opendir(fs);
+ if (!d)
+ return -errno;
+
+ *_d = d;
+ return 0;
+}
+
+int cg_read_subgroup(DIR *d, char **fn) {
+ struct dirent *de;
+
+ assert(d);
+ assert(fn);
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ char *b;
+
+ if (de->d_type != DT_DIR)
+ continue;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ b = strdup(de->d_name);
+ if (!b)
+ return -ENOMEM;
+
+ *fn = b;
+ return 1;
+ }
+
+ return 0;
+}
+
+int cg_rmdir(const char *controller, const char *path) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = cg_get_path(controller, path, NULL, &p);
+ if (r < 0)
+ return r;
+
+ r = rmdir(p);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ r = cg_hybrid_unified();
+ if (r <= 0)
+ return r;
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
+ }
+
+ return 0;
+}
+
+int cg_kill(
+ const char *controller,
+ const char *path,
+ int sig,
+ CGroupFlags flags,
+ Set *s,
+ cg_kill_log_func_t log_kill,
+ void *userdata) {
+
+ _cleanup_set_free_ Set *allocated_set = NULL;
+ bool done = false;
+ int r, ret = 0;
+ pid_t my_pid;
+
+ assert(sig >= 0);
+
+ /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
+ * SIGCONT on SIGKILL. */
+ if (IN_SET(sig, SIGCONT, SIGKILL))
+ flags &= ~CGROUP_SIGCONT;
+
+ /* This goes through the tasks list and kills them all. This
+ * is repeated until no further processes are added to the
+ * tasks list, to properly handle forking processes */
+
+ if (!s) {
+ s = allocated_set = set_new(NULL);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ my_pid = getpid_cached();
+
+ do {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid = 0;
+ done = true;
+
+ r = cg_enumerate_processes(controller, path, &f);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
+ continue;
+
+ if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
+ continue;
+
+ if (log_kill)
+ log_kill(pid, sig, userdata);
+
+ /* If we haven't killed this process yet, kill
+ * it */
+ if (kill(pid, sig) < 0) {
+ if (ret >= 0 && errno != ESRCH)
+ ret = -errno;
+ } else {
+ if (flags & CGROUP_SIGCONT)
+ (void) kill(pid, SIGCONT);
+
+ if (ret == 0)
+ ret = 1;
+ }
+
+ done = false;
+
+ r = set_put(s, PID_TO_PTR(pid));
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ }
+
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+
+ /* To avoid racing against processes which fork
+ * quicker than we can kill them we repeat this until
+ * no new pids need to be killed. */
+
+ } while (!done);
+
+ return ret;
+}
+
+int cg_kill_recursive(
+ const char *controller,
+ const char *path,
+ int sig,
+ CGroupFlags flags,
+ Set *s,
+ cg_kill_log_func_t log_kill,
+ void *userdata) {
+
+ _cleanup_set_free_ Set *allocated_set = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ int r, ret;
+ char *fn;
+
+ assert(path);
+ assert(sig >= 0);
+
+ if (!s) {
+ s = allocated_set = set_new(NULL);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
+
+ r = cg_enumerate_subgroups(controller, path, &d);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(path, "/", fn);
+ free(fn);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
+ if (r != 0 && ret >= 0)
+ ret = r;
+ }
+ if (ret >= 0 && r < 0)
+ ret = r;
+
+ if (flags & CGROUP_REMOVE) {
+ r = cg_rmdir(controller, path);
+ if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
+ return r;
+ }
+
+ return ret;
+}
+
+int cg_migrate(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ bool done = false;
+ _cleanup_set_free_ Set *s = NULL;
+ int r, ret = 0;
+ pid_t my_pid;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ s = set_new(NULL);
+ if (!s)
+ return -ENOMEM;
+
+ my_pid = getpid_cached();
+
+ do {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid = 0;
+ done = true;
+
+ r = cg_enumerate_processes(cfrom, pfrom, &f);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ /* This might do weird stuff if we aren't a
+ * single-threaded program. However, we
+ * luckily know we are not */
+ if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
+ continue;
+
+ if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
+ continue;
+
+ /* Ignore kernel threads. Since they can only
+ * exist in the root cgroup, we only check for
+ * them there. */
+ if (cfrom &&
+ empty_or_root(pfrom) &&
+ is_kernel_thread(pid) > 0)
+ continue;
+
+ r = cg_attach(cto, pto, pid);
+ if (r < 0) {
+ if (ret >= 0 && r != -ESRCH)
+ ret = r;
+ } else if (ret == 0)
+ ret = 1;
+
+ done = false;
+
+ r = set_put(s, PID_TO_PTR(pid));
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ }
+
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ } while (!done);
+
+ return ret;
+}
+
+int cg_migrate_recursive(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ int r, ret = 0;
+ char *fn;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
+
+ r = cg_enumerate_subgroups(cfrom, pfrom, &d);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(pfrom, "/", fn);
+ free(fn);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
+ if (r != 0 && ret >= 0)
+ ret = r;
+ }
+
+ if (r < 0 && ret >= 0)
+ ret = r;
+
+ if (flags & CGROUP_REMOVE) {
+ r = cg_rmdir(cfrom, pfrom);
+ if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
+ return r;
+ }
+
+ return ret;
+}
+
+int cg_migrate_recursive_fallback(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ int r;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
+ if (r < 0) {
+ char prefix[strlen(pto) + 1];
+
+ /* This didn't work? Then let's try all prefixes of the destination */
+
+ PATH_FOREACH_PREFIX(prefix, pto) {
+ int q;
+
+ q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
+ if (q >= 0)
+ return q;
+ }
+ }
+
+ return r;
+}
+
+static const char *controller_to_dirname(const char *controller) {
+ const char *e;
+
+ assert(controller);
+
+ /* Converts a controller name to the directory name below
+ * /sys/fs/cgroup/ we want to mount it to. Effectively, this
+ * just cuts off the name= prefixed used for named
+ * hierarchies, if it is specified. */
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ if (cg_hybrid_unified() > 0)
+ controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
+ else
+ controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
+ }
+
+ e = startswith(controller, "name=");
+ if (e)
+ return e;
+
+ return controller;
+}
+
+static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
+ const char *dn;
+ char *t = NULL;
+
+ assert(fs);
+ assert(controller);
+
+ dn = controller_to_dirname(controller);
+
+ if (isempty(path) && isempty(suffix))
+ t = strappend("/sys/fs/cgroup/", dn);
+ else if (isempty(path))
+ t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
+ else if (isempty(suffix))
+ t = strjoin("/sys/fs/cgroup/", dn, "/", path);
+ else
+ t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
+ if (!t)
+ return -ENOMEM;
+
+ *fs = t;
+ return 0;
+}
+
+static int join_path_unified(const char *path, const char *suffix, char **fs) {
+ char *t;
+
+ assert(fs);
+
+ if (isempty(path) && isempty(suffix))
+ t = strdup("/sys/fs/cgroup");
+ else if (isempty(path))
+ t = strappend("/sys/fs/cgroup/", suffix);
+ else if (isempty(suffix))
+ t = strappend("/sys/fs/cgroup/", path);
+ else
+ t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
+ if (!t)
+ return -ENOMEM;
+
+ *fs = t;
+ return 0;
+}
+
+int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
+ int r;
+
+ assert(fs);
+
+ if (!controller) {
+ char *t;
+
+ /* If no controller is specified, we return the path
+ * *below* the controllers, without any prefix. */
+
+ if (!path && !suffix)
+ return -EINVAL;
+
+ if (!suffix)
+ t = strdup(path);
+ else if (!path)
+ t = strdup(suffix);
+ else
+ t = strjoin(path, "/", suffix);
+ if (!t)
+ return -ENOMEM;
+
+ *fs = path_simplify(t, false);
+ return 0;
+ }
+
+ if (!cg_controller_is_valid(controller))
+ return -EINVAL;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ r = join_path_unified(path, suffix, fs);
+ else
+ r = join_path_legacy(controller, path, suffix, fs);
+ if (r < 0)
+ return r;
+
+ path_simplify(*fs, false);
+ return 0;
+}
+
+static int controller_is_accessible(const char *controller) {
+ int r;
+
+ assert(controller);
+
+ /* Checks whether a specific controller is accessible,
+ * i.e. its hierarchy mounted. In the unified hierarchy all
+ * controllers are considered accessible, except for the named
+ * hierarchies */
+
+ if (!cg_controller_is_valid(controller))
+ return -EINVAL;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* We don't support named hierarchies if we are using
+ * the unified hierarchy. */
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
+ return 0;
+
+ if (startswith(controller, "name="))
+ return -EOPNOTSUPP;
+
+ } else {
+ const char *cc, *dn;
+
+ dn = controller_to_dirname(controller);
+ cc = strjoina("/sys/fs/cgroup/", dn);
+
+ if (laccess(cc, F_OK) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
+ int r;
+
+ assert(controller);
+ assert(fs);
+
+ /* Check if the specified controller is actually accessible */
+ r = controller_is_accessible(controller);
+ if (r < 0)
+ return r;
+
+ return cg_get_path(controller, path, suffix, fs);
+}
+
+static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
+ assert(path);
+ assert(sb);
+ assert(ftwbuf);
+
+ if (typeflag != FTW_DP)
+ return 0;
+
+ if (ftwbuf->level < 1)
+ return 0;
+
+ (void) rmdir(path);
+ return 0;
+}
+
+int cg_trim(const char *controller, const char *path, bool delete_root) {
+ _cleanup_free_ char *fs = NULL;
+ int r = 0, q;
+
+ assert(path);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
+ if (errno == ENOENT)
+ r = 0;
+ else if (errno > 0)
+ r = -errno;
+ else
+ r = -EIO;
+ }
+
+ if (delete_root) {
+ if (rmdir(fs) < 0 && errno != ENOENT)
+ return -errno;
+ }
+
+ q = cg_hybrid_unified();
+ if (q < 0)
+ return q;
+ if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
+ if (q < 0)
+ log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
+ }
+
+ return r;
+}
+
+/* Create a cgroup in the hierarchy of controller.
+ * Returns 0 if the group already existed, 1 on success, negative otherwise.
+ */
+int cg_create(const char *controller, const char *path) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ r = cg_get_path_and_check(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ r = mkdir_parents(fs, 0755);
+ if (r < 0)
+ return r;
+
+ r = mkdir_errno_wrapper(fs, 0755);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+
+ if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
+ }
+
+ return 1;
+}
+
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
+ int r, q;
+
+ assert(pid >= 0);
+
+ r = cg_create(controller, path);
+ if (r < 0)
+ return r;
+
+ q = cg_attach(controller, path, pid);
+ if (q < 0)
+ return q;
+
+ /* This does not remove the cgroup on failure */
+ return r;
+}
+
+int cg_attach(const char *controller, const char *path, pid_t pid) {
+ _cleanup_free_ char *fs = NULL;
+ char c[DECIMAL_STR_MAX(pid_t) + 2];
+ int r;
+
+ assert(path);
+ assert(pid >= 0);
+
+ r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
+ if (r < 0)
+ return r;
+
+ if (pid == 0)
+ pid = getpid_cached();
+
+ xsprintf(c, PID_FMT "\n", pid);
+
+ r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+
+ if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
+ }
+
+ return 0;
+}
+
+int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
+ int r;
+
+ assert(controller);
+ assert(path);
+ assert(pid >= 0);
+
+ r = cg_attach(controller, path, pid);
+ if (r < 0) {
+ char prefix[strlen(path) + 1];
+
+ /* This didn't work? Then let's try all prefixes of
+ * the destination */
+
+ PATH_FOREACH_PREFIX(prefix, path) {
+ int q;
+
+ q = cg_attach(controller, prefix, pid);
+ if (q >= 0)
+ return q;
+ }
+ }
+
+ return r;
+}
+
+int cg_set_access(
+ const char *controller,
+ const char *path,
+ uid_t uid,
+ gid_t gid) {
+
+ struct Attribute {
+ const char *name;
+ bool fatal;
+ };
+
+ /* cgroup v1, aka legacy/non-unified */
+ static const struct Attribute legacy_attributes[] = {
+ { "cgroup.procs", true },
+ { "tasks", false },
+ { "cgroup.clone_children", false },
+ {},
+ };
+
+ /* cgroup v2, aka unified */
+ static const struct Attribute unified_attributes[] = {
+ { "cgroup.procs", true },
+ { "cgroup.subtree_control", true },
+ { "cgroup.threads", false },
+ {},
+ };
+
+ static const struct Attribute* const attributes[] = {
+ [false] = legacy_attributes,
+ [true] = unified_attributes,
+ };
+
+ _cleanup_free_ char *fs = NULL;
+ const struct Attribute *i;
+ int r, unified;
+
+ assert(path);
+
+ if (uid == UID_INVALID && gid == GID_INVALID)
+ return 0;
+
+ unified = cg_unified_controller(controller);
+ if (unified < 0)
+ return unified;
+
+ /* Configure access to the cgroup itself */
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(fs, 0755, uid, gid);
+ if (r < 0)
+ return r;
+
+ /* Configure access to the cgroup's attributes */
+ for (i = attributes[unified]; i->name; i++) {
+ fs = mfree(fs);
+
+ r = cg_get_path(controller, path, i->name, &fs);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(fs, 0644, uid, gid);
+ if (r < 0) {
+ if (i->fatal)
+ return r;
+
+ log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
+ }
+ }
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Always propagate access mode from unified to legacy controller */
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
+ if (r < 0)
+ log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
+ }
+ }
+
+ return 0;
+}
+
+int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ assert(path);
+ assert(name);
+ assert(value || size <= 0);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ if (setxattr(fs, name, value, size, flags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
+ _cleanup_free_ char *fs = NULL;
+ ssize_t n;
+ int r;
+
+ assert(path);
+ assert(name);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ n = getxattr(fs, name, value, size);
+ if (n < 0)
+ return -errno;
+
+ return (int) n;
+}
+
+int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *fs, *controller_str;
+ int unified, r;
+ size_t cs = 0;
+
+ assert(path);
+ assert(pid >= 0);
+
+ if (controller) {
+ if (!cg_controller_is_valid(controller))
+ return -EINVAL;
+ } else
+ controller = SYSTEMD_CGROUP_CONTROLLER;
+
+ unified = cg_unified_controller(controller);
+ if (unified < 0)
+ return unified;
+ if (unified == 0) {
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
+ controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
+ else
+ controller_str = controller;
+
+ cs = strlen(controller_str);
+ }
+
+ fs = procfs_file_alloca(pid, "cgroup");
+ f = fopen(fs, "re");
+ if (!f)
+ return errno == ENOENT ? -ESRCH : -errno;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *e, *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (unified) {
+ e = startswith(line, "0:");
+ if (!e)
+ continue;
+
+ e = strchr(e, ':');
+ if (!e)
+ continue;
+ } else {
+ char *l;
+ size_t k;
+ const char *word, *state;
+ bool found = false;
+
+ l = strchr(line, ':');
+ if (!l)
+ continue;
+
+ l++;
+ e = strchr(l, ':');
+ if (!e)
+ continue;
+
+ *e = 0;
+ FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
+ if (k == cs && memcmp(word, controller_str, cs) == 0) {
+ found = true;
+ break;
+ }
+ if (!found)
+ continue;
+ }
+
+ p = strdup(e + 1);
+ if (!p)
+ return -ENOMEM;
+
+ /* Truncate suffix indicating the process is a zombie */
+ e = endswith(p, " (deleted)");
+ if (e)
+ *e = 0;
+
+ *path = p;
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+int cg_install_release_agent(const char *controller, const char *agent) {
+ _cleanup_free_ char *fs = NULL, *contents = NULL;
+ const char *sc;
+ int r;
+
+ assert(agent);
+
+ r = cg_unified_controller(controller);
+ if (r < 0)
+ return r;
+ if (r > 0) /* doesn't apply to unified hierarchy */
+ return -EOPNOTSUPP;
+
+ r = cg_get_path(controller, NULL, "release_agent", &fs);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(fs, &contents);
+ if (r < 0)
+ return r;
+
+ sc = strstrip(contents);
+ if (isempty(sc)) {
+ r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+ } else if (!path_equal(sc, agent))
+ return -EEXIST;
+
+ fs = mfree(fs);
+ r = cg_get_path(controller, NULL, "notify_on_release", &fs);
+ if (r < 0)
+ return r;
+
+ contents = mfree(contents);
+ r = read_one_line_file(fs, &contents);
+ if (r < 0)
+ return r;
+
+ sc = strstrip(contents);
+ if (streq(sc, "0")) {
+ r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ if (!streq(sc, "1"))
+ return -EIO;
+
+ return 0;
+}
+
+int cg_uninstall_release_agent(const char *controller) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ r = cg_unified_controller(controller);
+ if (r < 0)
+ return r;
+ if (r > 0) /* Doesn't apply to unified hierarchy */
+ return -EOPNOTSUPP;
+
+ r = cg_get_path(controller, NULL, "notify_on_release", &fs);
+ if (r < 0)
+ return r;
+
+ r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ fs = mfree(fs);
+
+ r = cg_get_path(controller, NULL, "release_agent", &fs);
+ if (r < 0)
+ return r;
+
+ r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int cg_is_empty(const char *controller, const char *path) {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid;
+ int r;
+
+ assert(path);
+
+ r = cg_enumerate_processes(controller, path, &f);
+ if (r == -ENOENT)
+ return true;
+ if (r < 0)
+ return r;
+
+ r = cg_read_pid(f, &pid);
+ if (r < 0)
+ return r;
+
+ return r == 0;
+}
+
+int cg_is_empty_recursive(const char *controller, const char *path) {
+ int r;
+
+ assert(path);
+
+ /* The root cgroup is always populated */
+ if (controller && empty_or_root(path))
+ return false;
+
+ r = cg_unified_controller(controller);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ _cleanup_free_ char *t = NULL;
+
+ /* On the unified hierarchy we can check empty state
+ * via the "populated" attribute of "cgroup.events". */
+
+ r = cg_read_event(controller, path, "populated", &t);
+ if (r == -ENOENT)
+ return true;
+ if (r < 0)
+ return r;
+
+ return streq(t, "0");
+ } else {
+ _cleanup_closedir_ DIR *d = NULL;
+ char *fn;
+
+ r = cg_is_empty(controller, path);
+ if (r <= 0)
+ return r;
+
+ r = cg_enumerate_subgroups(controller, path, &d);
+ if (r == -ENOENT)
+ return true;
+ if (r < 0)
+ return r;
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(path, "/", fn);
+ free(fn);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_is_empty_recursive(controller, p);
+ if (r <= 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ return true;
+ }
+}
+
+int cg_split_spec(const char *spec, char **controller, char **path) {
+ char *t = NULL, *u = NULL;
+ const char *e;
+
+ assert(spec);
+
+ if (*spec == '/') {
+ if (!path_is_normalized(spec))
+ return -EINVAL;
+
+ if (path) {
+ t = strdup(spec);
+ if (!t)
+ return -ENOMEM;
+
+ *path = path_simplify(t, false);
+ }
+
+ if (controller)
+ *controller = NULL;
+
+ return 0;
+ }
+
+ e = strchr(spec, ':');
+ if (!e) {
+ if (!cg_controller_is_valid(spec))
+ return -EINVAL;
+
+ if (controller) {
+ t = strdup(spec);
+ if (!t)
+ return -ENOMEM;
+
+ *controller = t;
+ }
+
+ if (path)
+ *path = NULL;
+
+ return 0;
+ }
+
+ t = strndup(spec, e-spec);
+ if (!t)
+ return -ENOMEM;
+ if (!cg_controller_is_valid(t)) {
+ free(t);
+ return -EINVAL;
+ }
+
+ if (isempty(e+1))
+ u = NULL;
+ else {
+ u = strdup(e+1);
+ if (!u) {
+ free(t);
+ return -ENOMEM;
+ }
+
+ if (!path_is_normalized(u) ||
+ !path_is_absolute(u)) {
+ free(t);
+ free(u);
+ return -EINVAL;
+ }
+
+ path_simplify(u, false);
+ }
+
+ if (controller)
+ *controller = t;
+ else
+ free(t);
+
+ if (path)
+ *path = u;
+ else
+ free(u);
+
+ return 0;
+}
+
+int cg_mangle_path(const char *path, char **result) {
+ _cleanup_free_ char *c = NULL, *p = NULL;
+ char *t;
+ int r;
+
+ assert(path);
+ assert(result);
+
+ /* First, check if it already is a filesystem path */
+ if (path_startswith(path, "/sys/fs/cgroup")) {
+
+ t = strdup(path);
+ if (!t)
+ return -ENOMEM;
+
+ *result = path_simplify(t, false);
+ return 0;
+ }
+
+ /* Otherwise, treat it as cg spec */
+ r = cg_split_spec(path, &c, &p);
+ if (r < 0)
+ return r;
+
+ return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
+}
+
+int cg_get_root_path(char **path) {
+ char *p, *e;
+ int r;
+
+ assert(path);
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
+ if (r < 0)
+ return r;
+
+ e = endswith(p, "/" SPECIAL_INIT_SCOPE);
+ if (!e)
+ e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
+ if (!e)
+ e = endswith(p, "/system"); /* even more legacy */
+ if (e)
+ *e = 0;
+
+ *path = p;
+ return 0;
+}
+
+int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
+ _cleanup_free_ char *rt = NULL;
+ char *p;
+ int r;
+
+ assert(cgroup);
+ assert(shifted);
+
+ if (!root) {
+ /* If the root was specified let's use that, otherwise
+ * let's determine it from PID 1 */
+
+ r = cg_get_root_path(&rt);
+ if (r < 0)
+ return r;
+
+ root = rt;
+ }
+
+ p = path_startswith(cgroup, root);
+ if (p && p > cgroup)
+ *shifted = p - 1;
+ else
+ *shifted = cgroup;
+
+ return 0;
+}
+
+int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
+ _cleanup_free_ char *raw = NULL;
+ const char *c;
+ int r;
+
+ assert(pid >= 0);
+ assert(cgroup);
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
+ if (r < 0)
+ return r;
+
+ r = cg_shift_path(raw, root, &c);
+ if (r < 0)
+ return r;
+
+ if (c == raw)
+ *cgroup = TAKE_PTR(raw);
+ else {
+ char *n;
+
+ n = strdup(c);
+ if (!n)
+ return -ENOMEM;
+
+ *cgroup = n;
+ }
+
+ return 0;
+}
+
+int cg_path_decode_unit(const char *cgroup, char **unit) {
+ char *c, *s;
+ size_t n;
+
+ assert(cgroup);
+ assert(unit);
+
+ n = strcspn(cgroup, "/");
+ if (n < 3)
+ return -ENXIO;
+
+ c = strndupa(cgroup, n);
+ c = cg_unescape(c);
+
+ if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
+ return -ENXIO;
+
+ s = strdup(c);
+ if (!s)
+ return -ENOMEM;
+
+ *unit = s;
+ return 0;
+}
+
+static bool valid_slice_name(const char *p, size_t n) {
+
+ if (!p)
+ return false;
+
+ if (n < STRLEN("x.slice"))
+ return false;
+
+ if (memcmp(p + n - 6, ".slice", 6) == 0) {
+ char buf[n+1], *c;
+
+ memcpy(buf, p, n);
+ buf[n] = 0;
+
+ c = cg_unescape(buf);
+
+ return unit_name_is_valid(c, UNIT_NAME_PLAIN);
+ }
+
+ return false;
+}
+
+static const char *skip_slices(const char *p) {
+ assert(p);
+
+ /* Skips over all slice assignments */
+
+ for (;;) {
+ size_t n;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (!valid_slice_name(p, n))
+ return p;
+
+ p += n;
+ }
+}
+
+int cg_path_get_unit(const char *path, char **ret) {
+ const char *e;
+ char *unit;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ e = skip_slices(path);
+
+ r = cg_path_decode_unit(e, &unit);
+ if (r < 0)
+ return r;
+
+ /* We skipped over the slices, don't accept any now */
+ if (endswith(unit, ".slice")) {
+ free(unit);
+ return -ENXIO;
+ }
+
+ *ret = unit;
+ return 0;
+}
+
+int cg_pid_get_unit(pid_t pid, char **unit) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(unit);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_unit(cgroup, unit);
+}
+
+/**
+ * Skip session-*.scope, but require it to be there.
+ */
+static const char *skip_session(const char *p) {
+ size_t n;
+
+ if (isempty(p))
+ return NULL;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (n < STRLEN("session-x.scope"))
+ return NULL;
+
+ if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
+ char buf[n - 8 - 6 + 1];
+
+ memcpy(buf, p + 8, n - 8 - 6);
+ buf[n - 8 - 6] = 0;
+
+ /* Note that session scopes never need unescaping,
+ * since they cannot conflict with the kernel's own
+ * names, hence we don't need to call cg_unescape()
+ * here. */
+
+ if (!session_id_valid(buf))
+ return false;
+
+ p += n;
+ p += strspn(p, "/");
+ return p;
+ }
+
+ return NULL;
+}
+
+/**
+ * Skip user@*.service, but require it to be there.
+ */
+static const char *skip_user_manager(const char *p) {
+ size_t n;
+
+ if (isempty(p))
+ return NULL;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (n < STRLEN("user@x.service"))
+ return NULL;
+
+ if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
+ char buf[n - 5 - 8 + 1];
+
+ memcpy(buf, p + 5, n - 5 - 8);
+ buf[n - 5 - 8] = 0;
+
+ /* Note that user manager services never need unescaping,
+ * since they cannot conflict with the kernel's own
+ * names, hence we don't need to call cg_unescape()
+ * here. */
+
+ if (parse_uid(buf, NULL) < 0)
+ return NULL;
+
+ p += n;
+ p += strspn(p, "/");
+
+ return p;
+ }
+
+ return NULL;
+}
+
+static const char *skip_user_prefix(const char *path) {
+ const char *e, *t;
+
+ assert(path);
+
+ /* Skip slices, if there are any */
+ e = skip_slices(path);
+
+ /* Skip the user manager, if it's in the path now... */
+ t = skip_user_manager(e);
+ if (t)
+ return t;
+
+ /* Alternatively skip the user session if it is in the path... */
+ return skip_session(e);
+}
+
+int cg_path_get_user_unit(const char *path, char **ret) {
+ const char *t;
+
+ assert(path);
+ assert(ret);
+
+ t = skip_user_prefix(path);
+ if (!t)
+ return -ENXIO;
+
+ /* And from here on it looks pretty much the same as for a
+ * system unit, hence let's use the same parser from here
+ * on. */
+ return cg_path_get_unit(t, ret);
+}
+
+int cg_pid_get_user_unit(pid_t pid, char **unit) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(unit);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_user_unit(cgroup, unit);
+}
+
+int cg_path_get_machine_name(const char *path, char **machine) {
+ _cleanup_free_ char *u = NULL;
+ const char *sl;
+ int r;
+
+ r = cg_path_get_unit(path, &u);
+ if (r < 0)
+ return r;
+
+ sl = strjoina("/run/systemd/machines/unit:", u);
+ return readlink_malloc(sl, machine);
+}
+
+int cg_pid_get_machine_name(pid_t pid, char **machine) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(machine);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_machine_name(cgroup, machine);
+}
+
+int cg_path_get_session(const char *path, char **session) {
+ _cleanup_free_ char *unit = NULL;
+ char *start, *end;
+ int r;
+
+ assert(path);
+
+ r = cg_path_get_unit(path, &unit);
+ if (r < 0)
+ return r;
+
+ start = startswith(unit, "session-");
+ if (!start)
+ return -ENXIO;
+ end = endswith(start, ".scope");
+ if (!end)
+ return -ENXIO;
+
+ *end = 0;
+ if (!session_id_valid(start))
+ return -ENXIO;
+
+ if (session) {
+ char *rr;
+
+ rr = strdup(start);
+ if (!rr)
+ return -ENOMEM;
+
+ *session = rr;
+ }
+
+ return 0;
+}
+
+int cg_pid_get_session(pid_t pid, char **session) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_session(cgroup, session);
+}
+
+int cg_path_get_owner_uid(const char *path, uid_t *uid) {
+ _cleanup_free_ char *slice = NULL;
+ char *start, *end;
+ int r;
+
+ assert(path);
+
+ r = cg_path_get_slice(path, &slice);
+ if (r < 0)
+ return r;
+
+ start = startswith(slice, "user-");
+ if (!start)
+ return -ENXIO;
+ end = endswith(start, ".slice");
+ if (!end)
+ return -ENXIO;
+
+ *end = 0;
+ if (parse_uid(start, uid) < 0)
+ return -ENXIO;
+
+ return 0;
+}
+
+int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_owner_uid(cgroup, uid);
+}
+
+int cg_path_get_slice(const char *p, char **slice) {
+ const char *e = NULL;
+
+ assert(p);
+ assert(slice);
+
+ /* Finds the right-most slice unit from the beginning, but
+ * stops before we come to the first non-slice unit. */
+
+ for (;;) {
+ size_t n;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (!valid_slice_name(p, n)) {
+
+ if (!e) {
+ char *s;
+
+ s = strdup(SPECIAL_ROOT_SLICE);
+ if (!s)
+ return -ENOMEM;
+
+ *slice = s;
+ return 0;
+ }
+
+ return cg_path_decode_unit(e, slice);
+ }
+
+ e = p;
+ p += n;
+ }
+}
+
+int cg_pid_get_slice(pid_t pid, char **slice) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(slice);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_slice(cgroup, slice);
+}
+
+int cg_path_get_user_slice(const char *p, char **slice) {
+ const char *t;
+ assert(p);
+ assert(slice);
+
+ t = skip_user_prefix(p);
+ if (!t)
+ return -ENXIO;
+
+ /* And now it looks pretty much the same as for a system
+ * slice, so let's just use the same parser from here on. */
+ return cg_path_get_slice(t, slice);
+}
+
+int cg_pid_get_user_slice(pid_t pid, char **slice) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(slice);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_user_slice(cgroup, slice);
+}
+
+char *cg_escape(const char *p) {
+ bool need_prefix = false;
+
+ /* This implements very minimal escaping for names to be used
+ * as file names in the cgroup tree: any name which might
+ * conflict with a kernel name or is prefixed with '_' is
+ * prefixed with a '_'. That way, when reading cgroup names it
+ * is sufficient to remove a single prefixing underscore if
+ * there is one. */
+
+ /* The return value of this function (unlike cg_unescape())
+ * needs free()! */
+
+ if (IN_SET(p[0], 0, '_', '.') ||
+ STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") ||
+ startswith(p, "cgroup."))
+ need_prefix = true;
+ else {
+ const char *dot;
+
+ dot = strrchr(p, '.');
+ if (dot) {
+ CGroupController c;
+ size_t l = dot - p;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ const char *n;
+
+ n = cgroup_controller_to_string(c);
+
+ if (l != strlen(n))
+ continue;
+
+ if (memcmp(p, n, l) != 0)
+ continue;
+
+ need_prefix = true;
+ break;
+ }
+ }
+ }
+
+ if (need_prefix)
+ return strappend("_", p);
+
+ return strdup(p);
+}
+
+char *cg_unescape(const char *p) {
+ assert(p);
+
+ /* The return value of this function (unlike cg_escape())
+ * doesn't need free()! */
+
+ if (p[0] == '_')
+ return (char*) p+1;
+
+ return (char*) p;
+}
+
+#define CONTROLLER_VALID \
+ DIGITS LETTERS \
+ "_"
+
+bool cg_controller_is_valid(const char *p) {
+ const char *t, *s;
+
+ if (!p)
+ return false;
+
+ if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
+ return true;
+
+ s = startswith(p, "name=");
+ if (s)
+ p = s;
+
+ if (IN_SET(*p, 0, '_'))
+ return false;
+
+ for (t = p; *t; t++)
+ if (!strchr(CONTROLLER_VALID, *t))
+ return false;
+
+ if (t - p > FILENAME_MAX)
+ return false;
+
+ return true;
+}
+
+int cg_slice_to_path(const char *unit, char **ret) {
+ _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
+ const char *dash;
+ int r;
+
+ assert(unit);
+ assert(ret);
+
+ if (streq(unit, SPECIAL_ROOT_SLICE)) {
+ char *x;
+
+ x = strdup("");
+ if (!x)
+ return -ENOMEM;
+ *ret = x;
+ return 0;
+ }
+
+ if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ if (!endswith(unit, ".slice"))
+ return -EINVAL;
+
+ r = unit_name_to_prefix(unit, &p);
+ if (r < 0)
+ return r;
+
+ dash = strchr(p, '-');
+
+ /* Don't allow initial dashes */
+ if (dash == p)
+ return -EINVAL;
+
+ while (dash) {
+ _cleanup_free_ char *escaped = NULL;
+ char n[dash - p + sizeof(".slice")];
+
+#if HAS_FEATURE_MEMORY_SANITIZER
+ /* msan doesn't instrument stpncpy, so it thinks
+ * n is later used unitialized:
+ * https://github.com/google/sanitizers/issues/926
+ */
+ zero(n);
+#endif
+
+ /* Don't allow trailing or double dashes */
+ if (IN_SET(dash[1], 0, '-'))
+ return -EINVAL;
+
+ strcpy(stpncpy(n, p, dash - p), ".slice");
+ if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ escaped = cg_escape(n);
+ if (!escaped)
+ return -ENOMEM;
+
+ if (!strextend(&s, escaped, "/", NULL))
+ return -ENOMEM;
+
+ dash = strchr(dash+1, '-');
+ }
+
+ e = cg_escape(unit);
+ if (!e)
+ return -ENOMEM;
+
+ if (!strextend(&s, e, NULL))
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = cg_get_path(controller, path, attribute, &p);
+ if (r < 0)
+ return r;
+
+ return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
+}
+
+int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = cg_get_path(controller, path, attribute, &p);
+ if (r < 0)
+ return r;
+
+ return read_one_line_file(p, ret);
+}
+
+int cg_get_keyed_attribute(
+ const char *controller,
+ const char *path,
+ const char *attribute,
+ char **keys,
+ char **ret_values) {
+
+ _cleanup_free_ char *filename = NULL, *contents = NULL;
+ const char *p;
+ size_t n, i, n_done = 0;
+ char **v;
+ int r;
+
+ /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
+ * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
+ * entries as 'keys'. On success each entry will be set to the value of the matching key.
+ *
+ * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
+
+ r = cg_get_path(controller, path, attribute, &filename);
+ if (r < 0)
+ return r;
+
+ r = read_full_file(filename, &contents, NULL);
+ if (r < 0)
+ return r;
+
+ n = strv_length(keys);
+ if (n == 0) /* No keys to retrieve? That's easy, we are done then */
+ return 0;
+
+ /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
+ v = newa0(char*, n);
+
+ for (p = contents; *p;) {
+ const char *w = NULL;
+
+ for (i = 0; i < n; i++)
+ if (!v[i]) {
+ w = first_word(p, keys[i]);
+ if (w)
+ break;
+ }
+
+ if (w) {
+ size_t l;
+
+ l = strcspn(w, NEWLINE);
+ v[i] = strndup(w, l);
+ if (!v[i]) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n_done++;
+ if (n_done >= n)
+ goto done;
+
+ p = w + l;
+ } else
+ p += strcspn(p, NEWLINE);
+
+ p += strspn(p, NEWLINE);
+ }
+
+ r = -ENXIO;
+
+fail:
+ for (i = 0; i < n; i++)
+ free(v[i]);
+
+ return r;
+
+done:
+ memcpy(ret_values, v, sizeof(char*) * n);
+ return 0;
+
+}
+
+int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
+ CGroupController c;
+ CGroupMask done;
+ bool created;
+ int r;
+
+ /* This one will create a cgroup in our private tree, but also
+ * duplicate it in the trees specified in mask, and remove it
+ * in all others.
+ *
+ * Returns 0 if the group already existed in the systemd hierarchy,
+ * 1 on success, negative otherwise.
+ */
+
+ /* First create the cgroup in our own hierarchy. */
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
+ if (r < 0)
+ return r;
+ created = r;
+
+ /* If we are in the unified hierarchy, we are done now */
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return created;
+
+ supported &= CGROUP_MASK_V1;
+ mask = CGROUP_MASK_EXTEND_JOINED(mask);
+ done = 0;
+
+ /* Otherwise, do the same in the other hierarchies */
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ if (FLAGS_SET(mask, bit))
+ (void) cg_create(n, path);
+ else
+ (void) cg_trim(n, path, true);
+
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return created;
+}
+
+int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
+ CGroupController c;
+ CGroupMask done;
+ int r;
+
+ r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
+ if (r < 0)
+ return r;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ supported &= CGROUP_MASK_V1;
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *p = NULL;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (path_callback)
+ p = path_callback(bit, userdata);
+ if (!p)
+ p = path;
+
+ (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return 0;
+}
+
+int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
+ Iterator i;
+ void *pidp;
+ int r = 0;
+
+ SET_FOREACH(pidp, pids, i) {
+ pid_t pid = PTR_TO_PID(pidp);
+ int q;
+
+ q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
+ CGroupController c;
+ CGroupMask done;
+ int r = 0, q;
+
+ if (!path_equal(from, to)) {
+ r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
+ if (r < 0)
+ return r;
+ }
+
+ q = cg_all_unified();
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return r;
+
+ supported &= CGROUP_MASK_V1;
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *p = NULL;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (to_callback)
+ p = to_callback(bit, userdata);
+ if (!p)
+ p = to;
+
+ (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return r;
+}
+
+int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
+ CGroupController c;
+ CGroupMask done;
+ int r, q;
+
+ r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
+ if (r < 0)
+ return r;
+
+ q = cg_all_unified();
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return r;
+
+ supported &= CGROUP_MASK_V1;
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return r;
+}
+
+int cg_mask_to_string(CGroupMask mask, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t n = 0, allocated = 0;
+ bool space = false;
+ CGroupController c;
+
+ assert(ret);
+
+ if (mask == 0) {
+ *ret = NULL;
+ return 0;
+ }
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ const char *k;
+ size_t l;
+
+ if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
+ continue;
+
+ k = cgroup_controller_to_string(c);
+ l = strlen(k);
+
+ if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
+ return -ENOMEM;
+
+ if (space)
+ s[n] = ' ';
+ memcpy(s + n + space, k, l);
+ n += space + l;
+
+ space = true;
+ }
+
+ assert(s);
+
+ s[n] = 0;
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int cg_mask_from_string(const char *value, CGroupMask *ret) {
+ CGroupMask m = 0;
+
+ assert(ret);
+ assert(value);
+
+ for (;;) {
+ _cleanup_free_ char *n = NULL;
+ CGroupController v;
+ int r;
+
+ r = extract_first_word(&value, &n, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ v = cgroup_controller_from_string(n);
+ if (v < 0)
+ continue;
+
+ m |= CGROUP_CONTROLLER_TO_MASK(v);
+ }
+
+ *ret = m;
+ return 0;
+}
+
+int cg_mask_supported(CGroupMask *ret) {
+ CGroupMask mask;
+ int r;
+
+ /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
+ * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
+ * pseudo-controllers. */
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
+
+ /* In the unified hierarchy we can read the supported
+ * and accessible controllers from a the top-level
+ * cgroup attribute */
+
+ r = cg_get_root_path(&root);
+ if (r < 0)
+ return r;
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(path, &controllers);
+ if (r < 0)
+ return r;
+
+ r = cg_mask_from_string(controllers, &mask);
+ if (r < 0)
+ return r;
+
+ /* Currently, we support the cpu, memory, io and pids controller in the unified hierarchy, mask
+ * everything else off. */
+ mask &= CGROUP_MASK_V2;
+
+ } else {
+ CGroupController c;
+
+ /* In the legacy hierarchy, we check which hierarchies are mounted. */
+
+ mask = 0;
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(CGROUP_MASK_V1, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ if (controller_is_accessible(n) >= 0)
+ mask |= bit;
+ }
+ }
+
+ *ret = mask;
+ return 0;
+}
+
+int cg_kernel_controllers(Set **ret) {
+ _cleanup_set_free_free_ Set *controllers = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(ret);
+
+ /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
+ * and controllers that aren't currently accessible (because not mounted). This does not include "name="
+ * pseudo-controllers. */
+
+ controllers = set_new(&string_hash_ops);
+ if (!controllers)
+ return -ENOMEM;
+
+ f = fopen("/proc/cgroups", "re");
+ if (!f) {
+ if (errno == ENOENT) {
+ *ret = NULL;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ /* Ignore the header line */
+ (void) read_line(f, (size_t) -1, NULL);
+
+ for (;;) {
+ char *controller;
+ int enabled = 0;
+
+ errno = 0;
+ if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
+
+ if (feof(f))
+ break;
+
+ if (ferror(f) && errno > 0)
+ return -errno;
+
+ return -EBADMSG;
+ }
+
+ if (!enabled) {
+ free(controller);
+ continue;
+ }
+
+ if (!cg_controller_is_valid(controller)) {
+ free(controller);
+ return -EBADMSG;
+ }
+
+ r = set_consume(controllers, controller);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(controllers);
+
+ return 0;
+}
+
+static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
+
+/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on /sys/fs/cgroup/systemd. This
+ * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
+ * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
+ * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
+ *
+ * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
+ * process management but disable the compat dual layout, we return %true on
+ * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
+ */
+static thread_local bool unified_systemd_v232;
+
+static int cg_unified_update(void) {
+
+ struct statfs fs;
+
+ /* Checks if we support the unified hierarchy. Returns an
+ * error when the cgroup hierarchies aren't mounted yet or we
+ * have any other trouble determining if the unified hierarchy
+ * is supported. */
+
+ if (unified_cache >= CGROUP_UNIFIED_NONE)
+ return 0;
+
+ if (statfs("/sys/fs/cgroup/", &fs) < 0)
+ return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
+
+ if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
+ unified_cache = CGROUP_UNIFIED_ALL;
+ } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
+ if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
+ F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
+ unified_cache = CGROUP_UNIFIED_SYSTEMD;
+ unified_systemd_v232 = false;
+ } else {
+ if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
+ return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
+
+ if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
+ unified_cache = CGROUP_UNIFIED_SYSTEMD;
+ unified_systemd_v232 = true;
+ } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
+ log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
+ unified_cache = CGROUP_UNIFIED_NONE;
+ } else {
+ log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
+ (unsigned long long) fs.f_type);
+ unified_cache = CGROUP_UNIFIED_NONE;
+ }
+ }
+ } else
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
+ "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
+ (unsigned long long)fs.f_type);
+
+ return 0;
+}
+
+int cg_unified_controller(const char *controller) {
+ int r;
+
+ r = cg_unified_update();
+ if (r < 0)
+ return r;
+
+ if (unified_cache == CGROUP_UNIFIED_NONE)
+ return false;
+
+ if (unified_cache >= CGROUP_UNIFIED_ALL)
+ return true;
+
+ return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
+}
+
+int cg_all_unified(void) {
+ int r;
+
+ r = cg_unified_update();
+ if (r < 0)
+ return r;
+
+ return unified_cache >= CGROUP_UNIFIED_ALL;
+}
+
+int cg_hybrid_unified(void) {
+ int r;
+
+ r = cg_unified_update();
+ if (r < 0)
+ return r;
+
+ return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
+}
+
+int cg_unified_flush(void) {
+ unified_cache = CGROUP_UNIFIED_UNKNOWN;
+
+ return cg_unified_update();
+}
+
+int cg_enable_everywhere(
+ CGroupMask supported,
+ CGroupMask mask,
+ const char *p,
+ CGroupMask *ret_result_mask) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *fs = NULL;
+ CGroupController c;
+ CGroupMask ret = 0;
+ int r;
+
+ assert(p);
+
+ if (supported == 0) {
+ if (ret_result_mask)
+ *ret_result_mask = 0;
+ return 0;
+ }
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* On the legacy hiearchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
+ * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
+ * caller tends to use the returned mask later on to compare if all controllers where properly joined,
+ * and if not requeues realization. This use is the primary purpose of the return value, hence let's
+ * minimize surprises here and reduce triggers for re-realization by always saying we fully
+ * succeeded.) */
+ if (ret_result_mask)
+ *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
+ * CGROUP_MASK_V2: The 'supported' mask
+ * might contain pure-V1 or BPF
+ * controllers, and we never want to
+ * claim that we could enable those with
+ * cgroup.subtree_control */
+ return 0;
+ }
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
+ if (r < 0)
+ return r;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(CGROUP_MASK_V2, bit))
+ continue;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ {
+ char s[1 + strlen(n) + 1];
+
+ s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
+ strcpy(s + 1, n);
+
+ if (!f) {
+ f = fopen(fs, "we");
+ if (!f)
+ return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
+ }
+
+ r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
+ FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
+ clearerr(f);
+
+ /* If we can't turn off a controller, leave it on in the reported resulting mask. This
+ * happens for example when we attempt to turn off a controller up in the tree that is
+ * used down in the tree. */
+ if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
+ * only here, and not follow the same logic
+ * for other errors such as EINVAL or
+ * EOPNOTSUPP or anything else. That's
+ * because EBUSY indicates that the
+ * controllers is currently enabled and
+ * cannot be disabled because something down
+ * the hierarchy is still using it. Any other
+ * error most likely means something like "I
+ * never heard of this controller" or
+ * similar. In the former case it's hence
+ * safe to assume the controller is still on
+ * after the failed operation, while in the
+ * latter case it's safer to assume the
+ * controller is unknown and hence certainly
+ * not enabled. */
+ ret |= bit;
+ } else {
+ /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
+ if (FLAGS_SET(mask, bit))
+ ret |= bit;
+ }
+ }
+ }
+
+ /* Let's return the precise set of controllers now enabled for the cgroup. */
+ if (ret_result_mask)
+ *ret_result_mask = ret;
+
+ return 0;
+}
+
+bool cg_is_unified_wanted(void) {
+ static thread_local int wanted = -1;
+ int r;
+ bool b;
+ const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
+ _cleanup_free_ char *c = NULL;
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* If the hierarchy is already mounted, then follow whatever
+ * was chosen for it. */
+ if (cg_unified_flush() >= 0)
+ return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
+
+ /* If we were explicitly passed systemd.unified_cgroup_hierarchy,
+ * respect that. */
+ r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
+ if (r > 0)
+ return (wanted = b);
+
+ /* If we passed cgroup_no_v1=all with no other instructions, it seems
+ * highly unlikely that we want to use hybrid or legacy hierarchy. */
+ r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
+ if (r > 0 && streq_ptr(c, "all"))
+ return (wanted = true);
+
+ return (wanted = is_default);
+}
+
+bool cg_is_legacy_wanted(void) {
+ static thread_local int wanted = -1;
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* Check if we have cgroup v2 already mounted. */
+ if (cg_unified_flush() >= 0 &&
+ unified_cache == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+
+ /* Otherwise, assume that at least partial legacy is wanted,
+ * since cgroup v2 should already be mounted at this point. */
+ return (wanted = true);
+}
+
+bool cg_is_hybrid_wanted(void) {
+ static thread_local int wanted = -1;
+ int r;
+ bool b;
+ const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
+ /* We default to true if the default is "hybrid", obviously,
+ * but also when the default is "unified", because if we get
+ * called, it means that unified hierarchy was not mounted. */
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* If the hierarchy is already mounted, then follow whatever
+ * was chosen for it. */
+ if (cg_unified_flush() >= 0 &&
+ unified_cache == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+
+ /* Otherwise, let's see what the kernel command line has to say.
+ * Since checking is expensive, cache a non-error result. */
+ r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
+
+ /* The meaning of the kernel option is reversed wrt. to the return value
+ * of this function, hence the negation. */
+ return (wanted = r > 0 ? !b : is_default);
+}
+
+int cg_weight_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_WEIGHT_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
+ [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
+ [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
+ [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
+ [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
+};
+
+static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
+ [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
+ [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
+ [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
+ [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
+
+int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_CPU_SHARES_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_BLKIO_WEIGHT_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+bool is_cgroup_fs(const struct statfs *s) {
+ return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
+ is_fs_type(s, CGROUP2_SUPER_MAGIC);
+}
+
+bool fd_is_cgroup_fs(int fd) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_cgroup_fs(&s);
+}
+
+static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
+ [CGROUP_CONTROLLER_CPU] = "cpu",
+ [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
+ [CGROUP_CONTROLLER_IO] = "io",
+ [CGROUP_CONTROLLER_BLKIO] = "blkio",
+ [CGROUP_CONTROLLER_MEMORY] = "memory",
+ [CGROUP_CONTROLLER_DEVICES] = "devices",
+ [CGROUP_CONTROLLER_PIDS] = "pids",
+ [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
+ [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
+
+CGroupMask get_cpu_accounting_mask(void) {
+ static CGroupMask needed_mask = (CGroupMask) -1;
+
+ /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
+ * provided externally from the CPU controller, which means we don't
+ * need to enable the CPU controller just to get metrics. This is good,
+ * because enabling the CPU controller comes at a minor performance
+ * hit, especially when it's propagated deep into large hierarchies.
+ * There's also no separate CPU accounting controller available within
+ * a unified hierarchy.
+ *
+ * This combination of factors results in the desired cgroup mask to
+ * enable for CPU accounting varying as follows:
+ *
+ * ╔═════════════════════╤═════════════════════╗
+ * ║ Linux ≥4.15 │ Linux <4.15 ║
+ * ╔═══════════════╬═════════════════════╪═════════════════════╣
+ * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║
+ * ╟───────────────╫─────────────────────┼─────────────────────╢
+ * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
+ * ╚═══════════════╩═════════════════════╧═════════════════════╝
+ *
+ * We check kernel version here instead of manually checking whether
+ * cpu.stat is present for every cgroup, as that check in itself would
+ * already be fairly expensive.
+ *
+ * Kernels where this patch has been backported will therefore have the
+ * CPU controller enabled unnecessarily. This is more expensive than
+ * necessary, but harmless. ☺️
+ */
+
+ if (needed_mask == (CGroupMask) -1) {
+ if (cg_all_unified()) {
+ struct utsname u;
+ assert_se(uname(&u) >= 0);
+
+ if (str_verscmp(u.release, "4.15") < 0)
+ needed_mask = CGROUP_MASK_CPU;
+ else
+ needed_mask = 0;
+ } else
+ needed_mask = CGROUP_MASK_CPUACCT;
+ }
+
+ return needed_mask;
+}
+
+bool cpu_accounting_is_cheap(void) {
+ return get_cpu_accounting_mask() == 0;
+}
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
new file mode 100644
index 0000000..119b493
--- /dev/null
+++ b/src/basic/cgroup-util.h
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+
+#include "def.h"
+#include "set.h"
+
+#define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd"
+#define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified"
+#define SYSTEMD_CGROUP_CONTROLLER "_systemd"
+
+/* An enum of well known cgroup controllers */
+typedef enum CGroupController {
+ /* Original cgroup controllers */
+ CGROUP_CONTROLLER_CPU,
+ CGROUP_CONTROLLER_CPUACCT, /* v1 only */
+ CGROUP_CONTROLLER_IO, /* v2 only */
+ CGROUP_CONTROLLER_BLKIO, /* v1 only */
+ CGROUP_CONTROLLER_MEMORY,
+ CGROUP_CONTROLLER_DEVICES, /* v1 only */
+ CGROUP_CONTROLLER_PIDS,
+
+ /* BPF-based pseudo-controllers, v2 only */
+ CGROUP_CONTROLLER_BPF_FIREWALL,
+ CGROUP_CONTROLLER_BPF_DEVICES,
+
+ _CGROUP_CONTROLLER_MAX,
+ _CGROUP_CONTROLLER_INVALID = -1,
+} CGroupController;
+
+#define CGROUP_CONTROLLER_TO_MASK(c) (1U << (c))
+
+/* A bit mask of well known cgroup controllers */
+typedef enum CGroupMask {
+ CGROUP_MASK_CPU = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU),
+ CGROUP_MASK_CPUACCT = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT),
+ CGROUP_MASK_IO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_IO),
+ CGROUP_MASK_BLKIO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO),
+ CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY),
+ CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES),
+ CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS),
+ CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL),
+ CGROUP_MASK_BPF_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_DEVICES),
+
+ /* All real cgroup v1 controllers */
+ CGROUP_MASK_V1 = CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT|CGROUP_MASK_BLKIO|CGROUP_MASK_MEMORY|CGROUP_MASK_DEVICES|CGROUP_MASK_PIDS,
+
+ /* All real cgroup v2 controllers */
+ CGROUP_MASK_V2 = CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_MEMORY|CGROUP_MASK_PIDS,
+
+ /* All cgroup v2 BPF pseudo-controllers */
+ CGROUP_MASK_BPF = CGROUP_MASK_BPF_FIREWALL|CGROUP_MASK_BPF_DEVICES,
+
+ _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1
+} CGroupMask;
+
+static inline CGroupMask CGROUP_MASK_EXTEND_JOINED(CGroupMask mask) {
+ /* We always mount "cpu" and "cpuacct" in the same hierarchy. Hence, when one bit is set also set the other */
+
+ if (mask & (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT))
+ mask |= (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT);
+
+ return mask;
+}
+
+CGroupMask get_cpu_accounting_mask(void);
+bool cpu_accounting_is_cheap(void);
+
+/* Special values for all weight knobs on unified hierarchy */
+#define CGROUP_WEIGHT_INVALID ((uint64_t) -1)
+#define CGROUP_WEIGHT_MIN UINT64_C(1)
+#define CGROUP_WEIGHT_MAX UINT64_C(10000)
+#define CGROUP_WEIGHT_DEFAULT UINT64_C(100)
+
+#define CGROUP_LIMIT_MIN UINT64_C(0)
+#define CGROUP_LIMIT_MAX ((uint64_t) -1)
+
+static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x) {
+ return
+ x == CGROUP_WEIGHT_INVALID ||
+ (x >= CGROUP_WEIGHT_MIN && x <= CGROUP_WEIGHT_MAX);
+}
+
+/* IO limits on unified hierarchy */
+typedef enum CGroupIOLimitType {
+ CGROUP_IO_RBPS_MAX,
+ CGROUP_IO_WBPS_MAX,
+ CGROUP_IO_RIOPS_MAX,
+ CGROUP_IO_WIOPS_MAX,
+
+ _CGROUP_IO_LIMIT_TYPE_MAX,
+ _CGROUP_IO_LIMIT_TYPE_INVALID = -1
+} CGroupIOLimitType;
+
+extern const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX];
+
+const char* cgroup_io_limit_type_to_string(CGroupIOLimitType t) _const_;
+CGroupIOLimitType cgroup_io_limit_type_from_string(const char *s) _pure_;
+
+/* Special values for the cpu.shares attribute */
+#define CGROUP_CPU_SHARES_INVALID ((uint64_t) -1)
+#define CGROUP_CPU_SHARES_MIN UINT64_C(2)
+#define CGROUP_CPU_SHARES_MAX UINT64_C(262144)
+#define CGROUP_CPU_SHARES_DEFAULT UINT64_C(1024)
+
+static inline bool CGROUP_CPU_SHARES_IS_OK(uint64_t x) {
+ return
+ x == CGROUP_CPU_SHARES_INVALID ||
+ (x >= CGROUP_CPU_SHARES_MIN && x <= CGROUP_CPU_SHARES_MAX);
+}
+
+/* Special values for the blkio.weight attribute */
+#define CGROUP_BLKIO_WEIGHT_INVALID ((uint64_t) -1)
+#define CGROUP_BLKIO_WEIGHT_MIN UINT64_C(10)
+#define CGROUP_BLKIO_WEIGHT_MAX UINT64_C(1000)
+#define CGROUP_BLKIO_WEIGHT_DEFAULT UINT64_C(500)
+
+static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
+ return
+ x == CGROUP_BLKIO_WEIGHT_INVALID ||
+ (x >= CGROUP_BLKIO_WEIGHT_MIN && x <= CGROUP_BLKIO_WEIGHT_MAX);
+}
+
+/* Default resource limits */
+#define DEFAULT_TASKS_MAX_PERCENTAGE 15U /* 15% of PIDs, 4915 on default settings */
+#define DEFAULT_USER_TASKS_MAX_PERCENTAGE 33U /* 33% of PIDs, 10813 on default settings */
+
+typedef enum CGroupUnified {
+ CGROUP_UNIFIED_UNKNOWN = -1,
+ CGROUP_UNIFIED_NONE = 0, /* Both systemd and controllers on legacy */
+ CGROUP_UNIFIED_SYSTEMD = 1, /* Only systemd on unified */
+ CGROUP_UNIFIED_ALL = 2, /* Both systemd and controllers on unified */
+} CGroupUnified;
+
+/*
+ * General rules:
+ *
+ * We accept named hierarchies in the syntax "foo" and "name=foo".
+ *
+ * We expect that named hierarchies do not conflict in name with a
+ * kernel hierarchy, modulo the "name=" prefix.
+ *
+ * We always generate "normalized" controller names, i.e. without the
+ * "name=" prefix.
+ *
+ * We require absolute cgroup paths. When returning, we will always
+ * generate paths with multiple adjacent / removed.
+ */
+
+int cg_enumerate_processes(const char *controller, const char *path, FILE **_f);
+int cg_read_pid(FILE *f, pid_t *_pid);
+int cg_read_event(const char *controller, const char *path, const char *event,
+ char **val);
+
+int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d);
+int cg_read_subgroup(DIR *d, char **fn);
+
+typedef enum CGroupFlags {
+ CGROUP_SIGCONT = 1 << 0,
+ CGROUP_IGNORE_SELF = 1 << 1,
+ CGROUP_REMOVE = 1 << 2,
+} CGroupFlags;
+
+typedef void (*cg_kill_log_func_t)(pid_t pid, int sig, void *userdata);
+
+int cg_kill(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
+int cg_kill_recursive(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
+
+int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+
+int cg_split_spec(const char *spec, char **controller, char **path);
+int cg_mangle_path(const char *path, char **result);
+
+int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs);
+int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs);
+
+int cg_pid_get_path(const char *controller, pid_t pid, char **path);
+
+int cg_trim(const char *controller, const char *path, bool delete_root);
+
+int cg_rmdir(const char *controller, const char *path);
+
+int cg_create(const char *controller, const char *path);
+int cg_attach(const char *controller, const char *path, pid_t pid);
+int cg_attach_fallback(const char *controller, const char *path, pid_t pid);
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
+
+int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value);
+int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
+int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, char **keys, char **values);
+
+int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid);
+
+int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags);
+int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size);
+
+int cg_install_release_agent(const char *controller, const char *agent);
+int cg_uninstall_release_agent(const char *controller);
+
+int cg_is_empty(const char *controller, const char *path);
+int cg_is_empty_recursive(const char *controller, const char *path);
+
+int cg_get_root_path(char **path);
+
+int cg_path_get_session(const char *path, char **session);
+int cg_path_get_owner_uid(const char *path, uid_t *uid);
+int cg_path_get_unit(const char *path, char **unit);
+int cg_path_get_user_unit(const char *path, char **unit);
+int cg_path_get_machine_name(const char *path, char **machine);
+int cg_path_get_slice(const char *path, char **slice);
+int cg_path_get_user_slice(const char *path, char **slice);
+
+int cg_shift_path(const char *cgroup, const char *cached_root, const char **shifted);
+int cg_pid_get_path_shifted(pid_t pid, const char *cached_root, char **cgroup);
+
+int cg_pid_get_session(pid_t pid, char **session);
+int cg_pid_get_owner_uid(pid_t pid, uid_t *uid);
+int cg_pid_get_unit(pid_t pid, char **unit);
+int cg_pid_get_user_unit(pid_t pid, char **unit);
+int cg_pid_get_machine_name(pid_t pid, char **machine);
+int cg_pid_get_slice(pid_t pid, char **slice);
+int cg_pid_get_user_slice(pid_t pid, char **slice);
+
+int cg_path_decode_unit(const char *cgroup, char **unit);
+
+char *cg_escape(const char *p);
+char *cg_unescape(const char *p) _pure_;
+
+bool cg_controller_is_valid(const char *p);
+
+int cg_slice_to_path(const char *unit, char **ret);
+
+typedef const char* (*cg_migrate_callback_t)(CGroupMask mask, void *userdata);
+
+int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path);
+int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata);
+int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata);
+int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata);
+int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root);
+int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask);
+
+int cg_mask_supported(CGroupMask *ret);
+int cg_mask_from_string(const char *s, CGroupMask *ret);
+int cg_mask_to_string(CGroupMask mask, char **ret);
+
+int cg_kernel_controllers(Set **controllers);
+
+bool cg_ns_supported(void);
+
+int cg_all_unified(void);
+int cg_hybrid_unified(void);
+int cg_unified_controller(const char *controller);
+int cg_unified_flush(void);
+
+bool cg_is_unified_wanted(void);
+bool cg_is_legacy_wanted(void);
+bool cg_is_hybrid_wanted(void);
+
+const char* cgroup_controller_to_string(CGroupController c) _const_;
+CGroupController cgroup_controller_from_string(const char *s) _pure_;
+
+int cg_weight_parse(const char *s, uint64_t *ret);
+int cg_cpu_shares_parse(const char *s, uint64_t *ret);
+int cg_blkio_weight_parse(const char *s, uint64_t *ret);
+
+bool is_cgroup_fs(const struct statfs *s);
+bool fd_is_cgroup_fs(int fd);
diff --git a/src/basic/chattr-util.c b/src/basic/chattr-util.c
new file mode 100644
index 0000000..235cfb9
--- /dev/null
+++ b/src/basic/chattr-util.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <linux/fs.h>
+
+#include "chattr-util.h"
+#include "fd-util.h"
+#include "macro.h"
+
+int chattr_fd(int fd, unsigned value, unsigned mask, unsigned *previous) {
+ unsigned old_attr, new_attr;
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* Explicitly check whether this is a regular file or
+ * directory. If it is anything else (such as a device node or
+ * fifo), then the ioctl will not hit the file systems but
+ * possibly drivers, where the ioctl might have different
+ * effects. Notably, DRM is using the same ioctl() number. */
+
+ if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode))
+ return -ENOTTY;
+
+ if (mask == 0 && !previous)
+ return 0;
+
+ if (ioctl(fd, FS_IOC_GETFLAGS, &old_attr) < 0)
+ return -errno;
+
+ new_attr = (old_attr & ~mask) | (value & mask);
+ if (new_attr == old_attr) {
+ if (previous)
+ *previous = old_attr;
+ return 0;
+ }
+
+ if (ioctl(fd, FS_IOC_SETFLAGS, &new_attr) < 0)
+ return -errno;
+
+ if (previous)
+ *previous = old_attr;
+
+ return 1;
+}
+
+int chattr_path(const char *p, unsigned value, unsigned mask, unsigned *previous) {
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+
+ if (mask == 0)
+ return 0;
+
+ fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return chattr_fd(fd, value, mask, previous);
+}
+
+int read_attr_fd(int fd, unsigned *ret) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode))
+ return -ENOTTY;
+
+ if (ioctl(fd, FS_IOC_GETFLAGS, ret) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int read_attr_path(const char *p, unsigned *ret) {
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+ assert(ret);
+
+ fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return read_attr_fd(fd, ret);
+}
diff --git a/src/basic/chattr-util.h b/src/basic/chattr-util.h
new file mode 100644
index 0000000..7570bba
--- /dev/null
+++ b/src/basic/chattr-util.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int chattr_fd(int fd, unsigned value, unsigned mask, unsigned *previous);
+int chattr_path(const char *p, unsigned value, unsigned mask, unsigned *previous);
+
+int read_attr_fd(int fd, unsigned *ret);
+int read_attr_path(const char *p, unsigned *ret);
diff --git a/src/basic/conf-files.c b/src/basic/conf-files.c
new file mode 100644
index 0000000..b70c6e5
--- /dev/null
+++ b/src/basic/conf-files.c
@@ -0,0 +1,325 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "conf-files.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "path-util.h"
+#include "set.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static int files_add(
+ Hashmap *h,
+ Set *masked,
+ const char *suffix,
+ const char *root,
+ unsigned flags,
+ const char *path) {
+
+ _cleanup_closedir_ DIR *dir = NULL;
+ const char *dirpath;
+ struct dirent *de;
+ int r;
+
+ assert(h);
+ assert((flags & CONF_FILES_FILTER_MASKED) == 0 || masked);
+ assert(path);
+
+ dirpath = prefix_roota(root, path);
+
+ dir = opendir(dirpath);
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_debug_errno(errno, "Failed to open directory '%s': %m", dirpath);
+ }
+
+ FOREACH_DIRENT(de, dir, return -errno) {
+ struct stat st;
+ char *p, *key;
+
+ /* Does this match the suffix? */
+ if (suffix && !endswith(de->d_name, suffix))
+ continue;
+
+ /* Has this file already been found in an earlier directory? */
+ if (hashmap_contains(h, de->d_name)) {
+ log_debug("Skipping overridden file '%s/%s'.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Has this been masked in an earlier directory? */
+ if ((flags & CONF_FILES_FILTER_MASKED) && set_contains(masked, de->d_name)) {
+ log_debug("File '%s/%s' is masked by previous entry.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Read file metadata if we shall validate the check for file masks, for node types or whether the node is marked executable. */
+ if (flags & (CONF_FILES_FILTER_MASKED|CONF_FILES_REGULAR|CONF_FILES_DIRECTORY|CONF_FILES_EXECUTABLE))
+ if (fstatat(dirfd(dir), de->d_name, &st, 0) < 0) {
+ log_debug_errno(errno, "Failed to stat '%s/%s', ignoring: %m", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Is this a masking entry? */
+ if ((flags & CONF_FILES_FILTER_MASKED))
+ if (null_or_empty(&st)) {
+ /* Mark this one as masked */
+ r = set_put_strdup(masked, de->d_name);
+ if (r < 0)
+ return r;
+
+ log_debug("File '%s/%s' is a mask.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Does this node have the right type? */
+ if (flags & (CONF_FILES_REGULAR|CONF_FILES_DIRECTORY))
+ if (!((flags & CONF_FILES_DIRECTORY) && S_ISDIR(st.st_mode)) &&
+ !((flags & CONF_FILES_REGULAR) && S_ISREG(st.st_mode))) {
+ log_debug("Ignoring '%s/%s', as it is not a of the right type.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Does this node have the executable bit set? */
+ if (flags & CONF_FILES_EXECUTABLE)
+ /* As requested: check if the file is marked exectuable. Note that we don't check access(X_OK)
+ * here, as we care about whether the file is marked executable at all, and not whether it is
+ * executable for us, because if so, such errors are stuff we should log about. */
+
+ if ((st.st_mode & 0111) == 0) { /* not executable */
+ log_debug("Ignoring '%s/%s', as it is not marked executable.", dirpath, de->d_name);
+ continue;
+ }
+
+ if (flags & CONF_FILES_BASENAME) {
+ p = strdup(de->d_name);
+ if (!p)
+ return -ENOMEM;
+
+ key = p;
+ } else {
+ p = strjoin(dirpath, "/", de->d_name);
+ if (!p)
+ return -ENOMEM;
+
+ key = basename(p);
+ }
+
+ r = hashmap_put(h, key, p);
+ if (r < 0) {
+ free(p);
+ return log_debug_errno(r, "Failed to add item to hashmap: %m");
+ }
+
+ assert(r > 0);
+ }
+
+ return 0;
+}
+
+static int base_cmp(char * const *a, char * const *b) {
+ return strcmp(basename(*a), basename(*b));
+}
+
+static int conf_files_list_strv_internal(char ***strv, const char *suffix, const char *root, unsigned flags, char **dirs) {
+ _cleanup_hashmap_free_ Hashmap *fh = NULL;
+ _cleanup_set_free_free_ Set *masked = NULL;
+ char **files, **p;
+ int r;
+
+ assert(strv);
+
+ /* This alters the dirs string array */
+ if (!path_strv_resolve_uniq(dirs, root))
+ return -ENOMEM;
+
+ fh = hashmap_new(&path_hash_ops);
+ if (!fh)
+ return -ENOMEM;
+
+ if (flags & CONF_FILES_FILTER_MASKED) {
+ masked = set_new(&path_hash_ops);
+ if (!masked)
+ return -ENOMEM;
+ }
+
+ STRV_FOREACH(p, dirs) {
+ r = files_add(fh, masked, suffix, root, flags, *p);
+ if (r == -ENOMEM)
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to search for files in %s, ignoring: %m", *p);
+ }
+
+ files = hashmap_get_strv(fh);
+ if (!files)
+ return -ENOMEM;
+
+ typesafe_qsort(files, hashmap_size(fh), base_cmp);
+ *strv = files;
+
+ return 0;
+}
+
+int conf_files_insert(char ***strv, const char *root, char **dirs, const char *path) {
+ /* Insert a path into strv, at the place honouring the usual sorting rules:
+ * - we first compare by the basename
+ * - and then we compare by dirname, allowing just one file with the given
+ * basename.
+ * This means that we will
+ * - add a new entry if basename(path) was not on the list,
+ * - do nothing if an entry with higher priority was already present,
+ * - do nothing if our new entry matches the existing entry,
+ * - replace the existing entry if our new entry has higher priority.
+ */
+ size_t i, n;
+ char *t;
+ int r;
+
+ n = strv_length(*strv);
+ for (i = 0; i < n; i++) {
+ int c;
+
+ c = base_cmp((char* const*) *strv + i, (char* const*) &path);
+ if (c == 0) {
+ char **dir;
+
+ /* Oh, there already is an entry with a matching name (the last component). */
+
+ STRV_FOREACH(dir, dirs) {
+ _cleanup_free_ char *rdir = NULL;
+ char *p1, *p2;
+
+ rdir = prefix_root(root, *dir);
+ if (!rdir)
+ return -ENOMEM;
+
+ p1 = path_startswith((*strv)[i], rdir);
+ if (p1)
+ /* Existing entry with higher priority
+ * or same priority, no need to do anything. */
+ return 0;
+
+ p2 = path_startswith(path, *dir);
+ if (p2) {
+ /* Our new entry has higher priority */
+
+ t = prefix_root(root, path);
+ if (!t)
+ return log_oom();
+
+ return free_and_replace((*strv)[i], t);
+ }
+ }
+
+ } else if (c > 0)
+ /* Following files have lower priority, let's go insert our
+ * new entry. */
+ break;
+
+ /* … we are not there yet, let's continue */
+ }
+
+ /* The new file has lower priority than all the existing entries */
+ t = prefix_root(root, path);
+ if (!t)
+ return -ENOMEM;
+
+ r = strv_insert(strv, i, t);
+ if (r < 0)
+ free(t);
+
+ return r;
+}
+
+int conf_files_list_strv(char ***strv, const char *suffix, const char *root, unsigned flags, const char* const* dirs) {
+ _cleanup_strv_free_ char **copy = NULL;
+
+ assert(strv);
+
+ copy = strv_copy((char**) dirs);
+ if (!copy)
+ return -ENOMEM;
+
+ return conf_files_list_strv_internal(strv, suffix, root, flags, copy);
+}
+
+int conf_files_list(char ***strv, const char *suffix, const char *root, unsigned flags, const char *dir, ...) {
+ _cleanup_strv_free_ char **dirs = NULL;
+ va_list ap;
+
+ assert(strv);
+
+ va_start(ap, dir);
+ dirs = strv_new_ap(dir, ap);
+ va_end(ap);
+
+ if (!dirs)
+ return -ENOMEM;
+
+ return conf_files_list_strv_internal(strv, suffix, root, flags, dirs);
+}
+
+int conf_files_list_nulstr(char ***strv, const char *suffix, const char *root, unsigned flags, const char *dirs) {
+ _cleanup_strv_free_ char **d = NULL;
+
+ assert(strv);
+
+ d = strv_split_nulstr(dirs);
+ if (!d)
+ return -ENOMEM;
+
+ return conf_files_list_strv_internal(strv, suffix, root, flags, d);
+}
+
+int conf_files_list_with_replacement(
+ const char *root,
+ char **config_dirs,
+ const char *replacement,
+ char ***files,
+ char **replace_file) {
+
+ _cleanup_strv_free_ char **f = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(config_dirs);
+ assert(files);
+ assert(replace_file || !replacement);
+
+ r = conf_files_list_strv(&f, ".conf", root, 0, (const char* const*) config_dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate config files: %m");
+
+ if (replacement) {
+ r = conf_files_insert(&f, root, config_dirs, replacement);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extend config file list: %m");
+
+ p = prefix_root(root, replacement);
+ if (!p)
+ return log_oom();
+ }
+
+ *files = TAKE_PTR(f);
+ if (replace_file)
+ *replace_file = TAKE_PTR(p);
+ return 0;
+}
diff --git a/src/basic/conf-files.h b/src/basic/conf-files.h
new file mode 100644
index 0000000..55ab326
--- /dev/null
+++ b/src/basic/conf-files.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+enum {
+ CONF_FILES_EXECUTABLE = 1 << 0,
+ CONF_FILES_REGULAR = 1 << 1,
+ CONF_FILES_DIRECTORY = 1 << 2,
+ CONF_FILES_BASENAME = 1 << 3,
+ CONF_FILES_FILTER_MASKED = 1 << 4,
+};
+
+int conf_files_list(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dir, ...) _sentinel_;
+int conf_files_list_strv(char ***ret, const char *suffix, const char *root, unsigned flags, const char* const* dirs);
+int conf_files_list_nulstr(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dirs);
+int conf_files_insert(char ***strv, const char *root, char **dirs, const char *path);
+int conf_files_list_with_replacement(
+ const char *root,
+ char **config_dirs,
+ const char *replacement,
+ char ***files,
+ char **replace_file);
diff --git a/src/basic/copy.c b/src/basic/copy.c
new file mode 100644
index 0000000..46e02a3
--- /dev/null
+++ b/src/basic/copy.c
@@ -0,0 +1,936 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/sendfile.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "mountpoint-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "xattr-util.h"
+
+#define COPY_BUFFER_SIZE (16U*1024U)
+
+/* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
+ * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
+ * case of bind mount cycles and suchlike. */
+#define COPY_DEPTH_MAX 2048U
+
+static ssize_t try_copy_file_range(
+ int fd_in, loff_t *off_in,
+ int fd_out, loff_t *off_out,
+ size_t len,
+ unsigned flags) {
+
+ static int have = -1;
+ ssize_t r;
+
+ if (have == 0)
+ return -ENOSYS;
+
+ r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
+ if (have < 0)
+ have = r >= 0 || errno != ENOSYS;
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+enum {
+ FD_IS_NO_PIPE,
+ FD_IS_BLOCKING_PIPE,
+ FD_IS_NONBLOCKING_PIPE,
+};
+
+static int fd_is_nonblock_pipe(int fd) {
+ struct stat st;
+ int flags;
+
+ /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISFIFO(st.st_mode))
+ return FD_IS_NO_PIPE;
+
+ flags = fcntl(fd, F_GETFL);
+ if (flags < 0)
+ return -errno;
+
+ return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE;
+}
+
+int copy_bytes_full(
+ int fdf, int fdt,
+ uint64_t max_bytes,
+ CopyFlags copy_flags,
+ void **ret_remains,
+ size_t *ret_remains_size,
+ copy_progress_bytes_t progress,
+ void *userdata) {
+
+ bool try_cfr = true, try_sendfile = true, try_splice = true;
+ int r, nonblock_pipe = -1;
+ size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
+ * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
+ * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
+ * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
+ * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
+ * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
+ * these parameters if non-NULL are set to NULL. */
+
+ if (ret_remains)
+ *ret_remains = NULL;
+ if (ret_remains_size)
+ *ret_remains_size = 0;
+
+ /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
+ * source and destination first. */
+ if ((copy_flags & COPY_REFLINK)) {
+ off_t foffset;
+
+ foffset = lseek(fdf, 0, SEEK_CUR);
+ if (foffset >= 0) {
+ off_t toffset;
+
+ toffset = lseek(fdt, 0, SEEK_CUR);
+ if (toffset >= 0) {
+
+ if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX)
+ r = btrfs_reflink(fdf, fdt); /* full file reflink */
+ else
+ r = btrfs_clone_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */
+ if (r >= 0) {
+ off_t t;
+
+ /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
+ if (max_bytes == UINT64_MAX) {
+
+ /* We cloned to the end of the source file, let's position the read
+ * pointer there, and query it at the same time. */
+ t = lseek(fdf, 0, SEEK_END);
+ if (t < 0)
+ return -errno;
+ if (t < foffset)
+ return -ESPIPE;
+
+ /* Let's adjust the destination file write pointer by the same number
+ * of bytes. */
+ t = lseek(fdt, toffset + (t - foffset), SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ return 0; /* we copied the whole thing, hence hit EOF, return 0 */
+ } else {
+ t = lseek(fdf, foffset + max_bytes, SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ t = lseek(fdt, toffset + max_bytes, SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
+ }
+ }
+ }
+ }
+ }
+
+ for (;;) {
+ ssize_t n;
+
+ if (max_bytes <= 0)
+ return 1; /* return > 0 if we hit the max_bytes limit */
+
+ if (max_bytes != UINT64_MAX && m > max_bytes)
+ m = max_bytes;
+
+ /* First try copy_file_range(), unless we already tried */
+ if (try_cfr) {
+ n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
+ if (n < 0) {
+ if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF))
+ return n;
+
+ try_cfr = false;
+ /* use fallback below */
+ } else if (n == 0) /* EOF */
+ break;
+ else
+ /* Success! */
+ goto next;
+ }
+
+ /* First try sendfile(), unless we already tried */
+ if (try_sendfile) {
+ n = sendfile(fdt, fdf, NULL, m);
+ if (n < 0) {
+ if (!IN_SET(errno, EINVAL, ENOSYS))
+ return -errno;
+
+ try_sendfile = false;
+ /* use fallback below */
+ } else if (n == 0) /* EOF */
+ break;
+ else
+ /* Success! */
+ goto next;
+ }
+
+ /* Then try splice, unless we already tried. */
+ if (try_splice) {
+
+ /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
+ * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
+ * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour here, and
+ * check if either of the specified fds are a pipe, and if so, let's pass the flag
+ * automatically, depending on O_NONBLOCK being set.
+ *
+ * Here's a twist though: when we use it to move data between two pipes of which one has
+ * O_NONBLOCK set and the other has not, then we have no individual control over O_NONBLOCK
+ * behaviour. Hence in that case we can't use splice() and still guarantee systematic
+ * O_NONBLOCK behaviour, hence don't. */
+
+ if (nonblock_pipe < 0) {
+ int a, b;
+
+ /* Check if either of these fds is a pipe, and if so non-blocking or not */
+ a = fd_is_nonblock_pipe(fdf);
+ if (a < 0)
+ return a;
+
+ b = fd_is_nonblock_pipe(fdt);
+ if (b < 0)
+ return b;
+
+ if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) ||
+ (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
+ (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
+
+ /* splice() only works if one of the fds is a pipe. If neither is, let's skip
+ * this step right-away. As mentioned above, if one of the two fds refers to a
+ * blocking pipe and the other to a non-blocking pipe, we can't use splice()
+ * either, hence don't try either. This hence means we can only use splice() if
+ * either only one of the two fds is a pipe, or if both are pipes with the same
+ * nonblocking flag setting. */
+
+ try_splice = false;
+ else
+ nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE;
+ }
+ }
+
+ if (try_splice) {
+ n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0);
+ if (n < 0) {
+ if (!IN_SET(errno, EINVAL, ENOSYS))
+ return -errno;
+
+ try_splice = false;
+ /* use fallback below */
+ } else if (n == 0) /* EOF */
+ break;
+ else
+ /* Success! */
+ goto next;
+ }
+
+ /* As a fallback just copy bits by hand */
+ {
+ uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf;
+ ssize_t z;
+
+ n = read(fdf, buf, sizeof buf);
+ if (n < 0)
+ return -errno;
+ if (n == 0) /* EOF */
+ break;
+
+ z = (size_t) n;
+ do {
+ ssize_t k;
+
+ k = write(fdt, p, z);
+ if (k < 0) {
+ r = -errno;
+
+ if (ret_remains) {
+ void *copy;
+
+ copy = memdup(p, z);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret_remains = copy;
+ }
+
+ if (ret_remains_size)
+ *ret_remains_size = z;
+
+ return r;
+ }
+
+ assert(k <= z);
+ z -= k;
+ p += k;
+ } while (z > 0);
+ }
+
+ next:
+ if (progress) {
+ r = progress(n, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ if (max_bytes != (uint64_t) -1) {
+ assert(max_bytes >= (uint64_t) n);
+ max_bytes -= n;
+ }
+
+ /* sendfile accepts at most SSIZE_MAX-offset bytes to copy,
+ * so reduce our maximum by the amount we already copied,
+ * but don't go below our copy buffer size, unless we are
+ * close the limit of bytes we are allowed to copy. */
+ m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n);
+ }
+
+ return 0; /* return 0 if we hit EOF earlier than the size limit */
+}
+
+static int fd_copy_symlink(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags) {
+
+ _cleanup_free_ char *target = NULL;
+ int r;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ r = readlinkat_malloc(df, from, &target);
+ if (r < 0)
+ return r;
+
+ if (symlinkat(target, dt, to) < 0)
+ return -errno;
+
+ if (fchownat(dt, to,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid,
+ AT_SYMLINK_NOFOLLOW) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int fd_copy_regular(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags,
+ copy_progress_bytes_t progress,
+ void *userdata) {
+
+ _cleanup_close_ int fdf = -1, fdt = -1;
+ struct timespec ts[2];
+ int r, q;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ fdf = openat(df, from, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fdf < 0)
+ return -errno;
+
+ fdt = openat(dt, to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, st->st_mode & 07777);
+ if (fdt < 0)
+ return -errno;
+
+ r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress, userdata);
+ if (r < 0) {
+ (void) unlinkat(dt, to, 0);
+ return r;
+ }
+
+ if (fchown(fdt,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
+ r = -errno;
+
+ if (fchmod(fdt, st->st_mode & 07777) < 0)
+ r = -errno;
+
+ ts[0] = st->st_atim;
+ ts[1] = st->st_mtim;
+ (void) futimens(fdt, ts);
+ (void) copy_xattr(fdf, fdt);
+
+ q = close(fdt);
+ fdt = -1;
+
+ if (q < 0) {
+ r = -errno;
+ (void) unlinkat(dt, to, 0);
+ }
+
+ return r;
+}
+
+static int fd_copy_fifo(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags) {
+ int r;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ r = mkfifoat(dt, to, st->st_mode & 07777);
+ if (r < 0)
+ return -errno;
+
+ if (fchownat(dt, to,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid,
+ AT_SYMLINK_NOFOLLOW) < 0)
+ r = -errno;
+
+ if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
+ r = -errno;
+
+ return r;
+}
+
+static int fd_copy_node(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags) {
+ int r;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ r = mknodat(dt, to, st->st_mode, st->st_rdev);
+ if (r < 0)
+ return -errno;
+
+ if (fchownat(dt, to,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid,
+ AT_SYMLINK_NOFOLLOW) < 0)
+ r = -errno;
+
+ if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
+ r = -errno;
+
+ return r;
+}
+
+static int fd_copy_directory(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ dev_t original_device,
+ unsigned depth_left,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags,
+ const char *display_path,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_close_ int fdf = -1, fdt = -1;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ bool exists, created;
+ int r;
+
+ assert(st);
+ assert(to);
+
+ if (depth_left == 0)
+ return -ENAMETOOLONG;
+
+ if (from)
+ fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ else
+ fdf = fcntl(df, F_DUPFD_CLOEXEC, 3);
+ if (fdf < 0)
+ return -errno;
+
+ d = fdopendir(fdf);
+ if (!d)
+ return -errno;
+ fdf = -1;
+
+ exists = false;
+ if (copy_flags & COPY_MERGE_EMPTY) {
+ r = dir_is_empty_at(dt, to);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ else if (r == 1)
+ exists = true;
+ }
+
+ if (exists)
+ created = false;
+ else {
+ r = mkdirat(dt, to, st->st_mode & 07777);
+ if (r >= 0)
+ created = true;
+ else if (errno == EEXIST && (copy_flags & COPY_MERGE))
+ created = false;
+ else
+ return -errno;
+ }
+
+ fdt = openat(dt, to, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fdt < 0)
+ return -errno;
+
+ r = 0;
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ const char *child_display_path = NULL;
+ _cleanup_free_ char *dp = NULL;
+ struct stat buf;
+ int q;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(d), de->d_name, &buf, AT_SYMLINK_NOFOLLOW) < 0) {
+ r = -errno;
+ continue;
+ }
+
+ if (progress_path) {
+ if (display_path)
+ child_display_path = dp = strjoin(display_path, "/", de->d_name);
+ else
+ child_display_path = de->d_name;
+
+ r = progress_path(child_display_path, &buf, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ if (S_ISDIR(buf.st_mode)) {
+ /*
+ * Don't descend into directories on other file systems, if this is requested. We do a simple
+ * .st_dev check here, which basically comes for free. Note that we do this check only on
+ * directories, not other kind of file system objects, for two reason:
+ *
+ * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
+ * propagates the .st_dev field of the file system a file originates from all the way up
+ * through the stack to stat(). It doesn't do that for directories however. This means that
+ * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
+ * confusion we hence avoid relying on this check for regular files.
+ *
+ * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
+ * where we really want to avoid descending down in all eternity. However the .st_dev check
+ * is usually not sufficient for this protection anyway, as bind mount cycles from the same
+ * file system onto itself can't be detected that way. (Note we also do a recursion depth
+ * check, which is probably the better protection in this regard, which is why
+ * COPY_SAME_MOUNT is optional).
+ */
+
+ if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) {
+ if (buf.st_dev != original_device)
+ continue;
+
+ r = fd_is_mount_point(dirfd(d), de->d_name, 0);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+ }
+
+ q = fd_copy_directory(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device, depth_left-1, override_uid, override_gid, copy_flags, child_display_path, progress_path, progress_bytes, userdata);
+ } else if (S_ISREG(buf.st_mode))
+ q = fd_copy_regular(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, progress_bytes, userdata);
+ else if (S_ISLNK(buf.st_mode))
+ q = fd_copy_symlink(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
+ else if (S_ISFIFO(buf.st_mode))
+ q = fd_copy_fifo(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
+ else if (S_ISBLK(buf.st_mode) || S_ISCHR(buf.st_mode) || S_ISSOCK(buf.st_mode))
+ q = fd_copy_node(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
+ else
+ q = -EOPNOTSUPP;
+
+ if (q == -EEXIST && (copy_flags & COPY_MERGE))
+ q = 0;
+
+ if (q < 0)
+ r = q;
+ }
+
+ if (created) {
+ struct timespec ut[2] = {
+ st->st_atim,
+ st->st_mtim
+ };
+
+ if (fchown(fdt,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
+ r = -errno;
+
+ if (fchmod(fdt, st->st_mode & 07777) < 0)
+ r = -errno;
+
+ (void) copy_xattr(dirfd(d), fdt);
+ (void) futimens(fdt, ut);
+ }
+
+ return r;
+}
+
+int copy_tree_at_full(
+ int fdf,
+ const char *from,
+ int fdt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ struct stat st;
+
+ assert(from);
+ assert(to);
+
+ if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0)
+ return -errno;
+
+ if (S_ISREG(st.st_mode))
+ return fd_copy_regular(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, progress_bytes, userdata);
+ else if (S_ISDIR(st.st_mode))
+ return fd_copy_directory(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid, override_gid, copy_flags, NULL, progress_path, progress_bytes, userdata);
+ else if (S_ISLNK(st.st_mode))
+ return fd_copy_symlink(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
+ else if (S_ISFIFO(st.st_mode))
+ return fd_copy_fifo(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
+ else if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode) || S_ISSOCK(st.st_mode))
+ return fd_copy_node(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
+ else
+ return -EOPNOTSUPP;
+}
+
+int copy_directory_fd_full(
+ int dirfd,
+ const char *to,
+ CopyFlags copy_flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ struct stat st;
+
+ assert(dirfd >= 0);
+ assert(to);
+
+ if (fstat(dirfd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -ENOTDIR;
+
+ return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, progress_path, progress_bytes, userdata);
+}
+
+int copy_directory_full(
+ const char *from,
+ const char *to,
+ CopyFlags copy_flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ struct stat st;
+
+ assert(from);
+ assert(to);
+
+ if (lstat(from, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -ENOTDIR;
+
+ return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, progress_path, progress_bytes, userdata);
+}
+
+int copy_file_fd_full(
+ const char *from,
+ int fdt,
+ CopyFlags copy_flags,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_close_ int fdf = -1;
+ int r;
+
+ assert(from);
+ assert(fdt >= 0);
+
+ fdf = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fdf < 0)
+ return -errno;
+
+ r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress_bytes, userdata);
+
+ (void) copy_times(fdf, fdt);
+ (void) copy_xattr(fdf, fdt);
+
+ return r;
+}
+
+int copy_file_full(
+ const char *from,
+ const char *to,
+ int flags,
+ mode_t mode,
+ unsigned chattr_flags,
+ CopyFlags copy_flags,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ int fdt = -1, r;
+
+ assert(from);
+ assert(to);
+
+ RUN_WITH_UMASK(0000) {
+ fdt = open(to, flags|O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, mode);
+ if (fdt < 0)
+ return -errno;
+ }
+
+ if (chattr_flags != 0)
+ (void) chattr_fd(fdt, chattr_flags, (unsigned) -1, NULL);
+
+ r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata);
+ if (r < 0) {
+ close(fdt);
+ (void) unlink(to);
+ return r;
+ }
+
+ if (close(fdt) < 0) {
+ unlink_noerrno(to);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int copy_file_atomic_full(
+ const char *from,
+ const char *to,
+ mode_t mode,
+ unsigned chattr_flags,
+ CopyFlags copy_flags,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_(unlink_and_freep) char *t = NULL;
+ _cleanup_close_ int fdt = -1;
+ int r;
+
+ assert(from);
+ assert(to);
+
+ /* We try to use O_TMPFILE here to create the file if we can. Note that that only works if COPY_REPLACE is not
+ * set though as we need to use linkat() for linking the O_TMPFILE file into the file system but that system
+ * call can't replace existing files. Hence, if COPY_REPLACE is set we create a temporary name in the file
+ * system right-away and unconditionally which we then can renameat() to the right name after we completed
+ * writing it. */
+
+ if (copy_flags & COPY_REPLACE) {
+ r = tempfn_random(to, NULL, &t);
+ if (r < 0)
+ return r;
+
+ fdt = open(t, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|O_WRONLY|O_CLOEXEC, 0600);
+ if (fdt < 0) {
+ t = mfree(t);
+ return -errno;
+ }
+ } else {
+ fdt = open_tmpfile_linkable(to, O_WRONLY|O_CLOEXEC, &t);
+ if (fdt < 0)
+ return fdt;
+ }
+
+ if (chattr_flags != 0)
+ (void) chattr_fd(fdt, chattr_flags, (unsigned) -1, NULL);
+
+ r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata);
+ if (r < 0)
+ return r;
+
+ if (fchmod(fdt, mode) < 0)
+ return -errno;
+
+ if (copy_flags & COPY_REPLACE) {
+ if (renameat(AT_FDCWD, t, AT_FDCWD, to) < 0)
+ return -errno;
+ } else {
+ r = link_tmpfile(fdt, t, to);
+ if (r < 0)
+ return r;
+ }
+
+ t = mfree(t);
+ return 0;
+}
+
+int copy_times(int fdf, int fdt) {
+ struct timespec ut[2];
+ struct stat st;
+ usec_t crtime = 0;
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ if (fstat(fdf, &st) < 0)
+ return -errno;
+
+ ut[0] = st.st_atim;
+ ut[1] = st.st_mtim;
+
+ if (futimens(fdt, ut) < 0)
+ return -errno;
+
+ if (fd_getcrtime(fdf, &crtime) >= 0)
+ (void) fd_setcrtime(fdt, crtime);
+
+ return 0;
+}
+
+int copy_xattr(int fdf, int fdt) {
+ _cleanup_free_ char *bufa = NULL, *bufb = NULL;
+ size_t sza = 100, szb = 100;
+ ssize_t n;
+ int ret = 0;
+ const char *p;
+
+ for (;;) {
+ bufa = malloc(sza);
+ if (!bufa)
+ return -ENOMEM;
+
+ n = flistxattr(fdf, bufa, sza);
+ if (n == 0)
+ return 0;
+ if (n > 0)
+ break;
+ if (errno != ERANGE)
+ return -errno;
+
+ sza *= 2;
+
+ bufa = mfree(bufa);
+ }
+
+ p = bufa;
+ while (n > 0) {
+ size_t l;
+
+ l = strlen(p);
+ assert(l < (size_t) n);
+
+ if (startswith(p, "user.")) {
+ ssize_t m;
+
+ if (!bufb) {
+ bufb = malloc(szb);
+ if (!bufb)
+ return -ENOMEM;
+ }
+
+ m = fgetxattr(fdf, p, bufb, szb);
+ if (m < 0) {
+ if (errno == ERANGE) {
+ szb *= 2;
+ bufb = mfree(bufb);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ if (fsetxattr(fdt, p, bufb, m, 0) < 0)
+ ret = -errno;
+ }
+
+ p += l + 1;
+ n -= l + 1;
+ }
+
+ return ret;
+}
diff --git a/src/basic/copy.h b/src/basic/copy.h
new file mode 100644
index 0000000..f677021
--- /dev/null
+++ b/src/basic/copy.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+typedef enum CopyFlags {
+ COPY_REFLINK = 1 << 0, /* Try to reflink */
+ COPY_MERGE = 1 << 1, /* Merge existing trees with our new one to copy */
+ COPY_REPLACE = 1 << 2, /* Replace an existing file if there's one */
+ COPY_SAME_MOUNT = 1 << 3, /* Don't descend recursively into other file systems, across mount point boundaries */
+ COPY_MERGE_EMPTY = 1 << 4, /* Merge an existing, empty directory with our new tree to copy */
+} CopyFlags;
+
+typedef int (*copy_progress_bytes_t)(uint64_t n_bytes, void *userdata);
+typedef int (*copy_progress_path_t)(const char *path, const struct stat *st, void *userdata);
+
+int copy_file_fd_full(const char *from, int to, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_file_fd(const char *from, int to, CopyFlags copy_flags) {
+ return copy_file_fd_full(from, to, copy_flags, NULL, NULL);
+}
+
+int copy_file_full(const char *from, const char *to, int open_flags, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_file(const char *from, const char *to, int open_flags, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags) {
+ return copy_file_full(from, to, open_flags, mode, chattr_flags, copy_flags, NULL, NULL);
+}
+
+int copy_file_atomic_full(const char *from, const char *to, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_file_atomic(const char *from, const char *to, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags) {
+ return copy_file_atomic_full(from, to, mode, chattr_flags, copy_flags, NULL, NULL);
+}
+
+int copy_tree_at_full(int fdf, const char *from, int fdt, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int copy_tree_at(int fdf, const char *from, int fdt, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags) {
+ return copy_tree_at_full(fdf, from, fdt, to, override_uid, override_gid, copy_flags, NULL, NULL, NULL);
+}
+static inline int copy_tree(const char *from, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags) {
+ return copy_tree_at_full(AT_FDCWD, from, AT_FDCWD, to, override_uid, override_gid, copy_flags, NULL, NULL, NULL);
+}
+
+int copy_directory_fd_full(int dirfd, const char *to, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int copy_directory_fd(int dirfd, const char *to, CopyFlags copy_flags) {
+ return copy_directory_fd_full(dirfd, to, copy_flags, NULL, NULL, NULL);
+}
+
+int copy_directory_full(const char *from, const char *to, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int copy_directory(const char *from, const char *to, CopyFlags copy_flags) {
+ return copy_directory_full(from, to, copy_flags, NULL, NULL, NULL);
+}
+
+int copy_bytes_full(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags, void **ret_remains, size_t *ret_remains_size, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_bytes(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags) {
+ return copy_bytes_full(fdf, fdt, max_bytes, copy_flags, NULL, NULL, NULL, NULL);
+}
+
+int copy_times(int fdf, int fdt);
+int copy_xattr(int fdf, int fdt);
diff --git a/src/basic/def.h b/src/basic/def.h
new file mode 100644
index 0000000..5be018d
--- /dev/null
+++ b/src/basic/def.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "util.h"
+
+#define DEFAULT_TIMEOUT_USEC (90*USEC_PER_SEC)
+#define DEFAULT_RESTART_USEC (100*USEC_PER_MSEC)
+#define DEFAULT_CONFIRM_USEC (30*USEC_PER_SEC)
+
+#define DEFAULT_START_LIMIT_INTERVAL (10*USEC_PER_SEC)
+#define DEFAULT_START_LIMIT_BURST 5
+
+/* The default time after which exit-on-idle services exit. This
+ * should be kept lower than the watchdog timeout, because otherwise
+ * the watchdog pings will keep the loop busy. */
+#define DEFAULT_EXIT_USEC (30*USEC_PER_SEC)
+
+/* The default value for the net.unix.max_dgram_qlen sysctl */
+#define DEFAULT_UNIX_MAX_DGRAM_QLEN 512UL
+
+#define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT
+#define SIGNALS_IGNORE SIGPIPE
+
+#if HAVE_SPLIT_USR
+#define KBD_KEYMAP_DIRS \
+ "/usr/share/keymaps/\0" \
+ "/usr/share/kbd/keymaps/\0" \
+ "/usr/lib/kbd/keymaps/\0" \
+ "/lib/kbd/keymaps/\0"
+#else
+#define KBD_KEYMAP_DIRS \
+ "/usr/share/keymaps/\0" \
+ "/usr/share/kbd/keymaps/\0" \
+ "/usr/lib/kbd/keymaps/\0"
+#endif
+
+/* Note that we use the new /run prefix here (instead of /var/run) since we require them to be aliases and that way we
+ * become independent of /var being mounted */
+#define DEFAULT_SYSTEM_BUS_ADDRESS "unix:path=/run/dbus/system_bus_socket"
+#define DEFAULT_USER_BUS_ADDRESS_FMT "unix:path=%s/bus"
+
+#define PLYMOUTH_SOCKET { \
+ .un.sun_family = AF_UNIX, \
+ .un.sun_path = "\0/org/freedesktop/plymouthd", \
+ }
+
+#define NOTIFY_FD_MAX 768
+#define NOTIFY_BUFFER_MAX PIPE_BUF
+
+#if HAVE_SPLIT_USR
+# define _CONF_PATHS_SPLIT_USR_NULSTR(n) "/lib/" n "\0"
+# define _CONF_PATHS_SPLIT_USR(n) , "/lib/" n
+#else
+# define _CONF_PATHS_SPLIT_USR_NULSTR(n)
+# define _CONF_PATHS_SPLIT_USR(n)
+#endif
+
+/* Return a nulstr for a standard cascade of configuration paths,
+ * suitable to pass to conf_files_list_nulstr() or config_parse_many_nulstr()
+ * to implement drop-in directories for extending configuration
+ * files. */
+#define CONF_PATHS_NULSTR(n) \
+ "/etc/" n "\0" \
+ "/run/" n "\0" \
+ "/usr/local/lib/" n "\0" \
+ "/usr/lib/" n "\0" \
+ _CONF_PATHS_SPLIT_USR_NULSTR(n)
+
+#define CONF_PATHS_STRV(n) \
+ STRV_MAKE( \
+ "/etc/" n, \
+ "/run/" n, \
+ "/usr/local/lib/" n, \
+ "/usr/lib/" n \
+ _CONF_PATHS_SPLIT_USR(n))
+
+#define HIGH_RLIMIT_MEMLOCK (1024ULL*1024ULL*64ULL)
diff --git a/src/basic/device-nodes.c b/src/basic/device-nodes.c
new file mode 100644
index 0000000..5fcdf24
--- /dev/null
+++ b/src/basic/device-nodes.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "device-nodes.h"
+#include "utf8.h"
+
+int whitelisted_char_for_devnode(char c, const char *white) {
+
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z') ||
+ strchr("#+-.:=@_", c) != NULL ||
+ (white != NULL && strchr(white, c) != NULL))
+ return 1;
+
+ return 0;
+}
+
+int encode_devnode_name(const char *str, char *str_enc, size_t len) {
+ size_t i, j;
+
+ if (!str || !str_enc)
+ return -EINVAL;
+
+ for (i = 0, j = 0; str[i] != '\0'; i++) {
+ int seqlen;
+
+ seqlen = utf8_encoded_valid_unichar(&str[i]);
+ if (seqlen > 1) {
+
+ if (len-j < (size_t)seqlen)
+ return -EINVAL;
+
+ memcpy(&str_enc[j], &str[i], seqlen);
+ j += seqlen;
+ i += (seqlen-1);
+
+ } else if (str[i] == '\\' || !whitelisted_char_for_devnode(str[i], NULL)) {
+
+ if (len-j < 4)
+ return -EINVAL;
+
+ sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
+ j += 4;
+
+ } else {
+ if (len-j < 1)
+ return -EINVAL;
+
+ str_enc[j] = str[i];
+ j++;
+ }
+ }
+
+ if (len-j < 1)
+ return -EINVAL;
+
+ str_enc[j] = '\0';
+ return 0;
+}
diff --git a/src/basic/device-nodes.h b/src/basic/device-nodes.h
new file mode 100644
index 0000000..3840e6d
--- /dev/null
+++ b/src/basic/device-nodes.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stddef.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "stdio-util.h"
+
+int encode_devnode_name(const char *str, char *str_enc, size_t len);
+int whitelisted_char_for_devnode(char c, const char *additional);
+
+#define DEV_NUM_PATH_MAX \
+ (STRLEN("/dev/block/") + DECIMAL_STR_MAX(dev_t) + 1 + DECIMAL_STR_MAX(dev_t))
+#define xsprintf_dev_num_path(buf, type, devno) \
+ xsprintf(buf, "/dev/%s/%u:%u", type, major(devno), minor(devno))
diff --git a/src/basic/dirent-util.c b/src/basic/dirent-util.c
new file mode 100644
index 0000000..d1d2c0e
--- /dev/null
+++ b/src/basic/dirent-util.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "dirent-util.h"
+#include "path-util.h"
+#include "string-util.h"
+
+int dirent_ensure_type(DIR *d, struct dirent *de) {
+ struct stat st;
+
+ assert(d);
+ assert(de);
+
+ if (de->d_type != DT_UNKNOWN)
+ return 0;
+
+ if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
+ return -errno;
+
+ de->d_type =
+ S_ISREG(st.st_mode) ? DT_REG :
+ S_ISDIR(st.st_mode) ? DT_DIR :
+ S_ISLNK(st.st_mode) ? DT_LNK :
+ S_ISFIFO(st.st_mode) ? DT_FIFO :
+ S_ISSOCK(st.st_mode) ? DT_SOCK :
+ S_ISCHR(st.st_mode) ? DT_CHR :
+ S_ISBLK(st.st_mode) ? DT_BLK :
+ DT_UNKNOWN;
+
+ return 0;
+}
+
+bool dirent_is_file(const struct dirent *de) {
+ assert(de);
+
+ if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ if (hidden_or_backup_file(de->d_name))
+ return false;
+
+ return true;
+}
+
+bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) {
+ assert(de);
+
+ if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ if (de->d_name[0] == '.')
+ return false;
+
+ if (!suffix)
+ return true;
+
+ return endswith(de->d_name, suffix);
+}
+
+struct dirent* readdir_no_dot(DIR *dirp) {
+ struct dirent* d;
+
+ for (;;) {
+ d = readdir(dirp);
+ if (d && dot_or_dot_dot(d->d_name))
+ continue;
+ return d;
+ }
+}
diff --git a/src/basic/dirent-util.h b/src/basic/dirent-util.h
new file mode 100644
index 0000000..b1b8767
--- /dev/null
+++ b/src/basic/dirent-util.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <dirent.h>
+#include <errno.h>
+#include <stdbool.h>
+
+#include "macro.h"
+#include "path-util.h"
+
+int dirent_ensure_type(DIR *d, struct dirent *de);
+
+bool dirent_is_file(const struct dirent *de) _pure_;
+bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) _pure_;
+
+struct dirent* readdir_no_dot(DIR *dirp);
+
+#define FOREACH_DIRENT(de, d, on_error) \
+ for (errno = 0, de = readdir(d);; errno = 0, de = readdir(d)) \
+ if (!de) { \
+ if (errno > 0) { \
+ on_error; \
+ } \
+ break; \
+ } else if (hidden_or_backup_file((de)->d_name)) \
+ continue; \
+ else
+
+#define FOREACH_DIRENT_ALL(de, d, on_error) \
+ for (errno = 0, de = readdir(d);; errno = 0, de = readdir(d)) \
+ if (!de) { \
+ if (errno > 0) { \
+ on_error; \
+ } \
+ break; \
+ } else
diff --git a/src/basic/env-file.c b/src/basic/env-file.c
new file mode 100644
index 0000000..7f10f9a
--- /dev/null
+++ b/src/basic/env-file.c
@@ -0,0 +1,564 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "utf8.h"
+
+static int parse_env_file_internal(
+ FILE *f,
+ const char *fname,
+ int (*push) (const char *filename, unsigned line,
+ const char *key, char *value, void *userdata, int *n_pushed),
+ void *userdata,
+ int *n_pushed) {
+
+ size_t key_alloc = 0, n_key = 0, value_alloc = 0, n_value = 0, last_value_whitespace = (size_t) -1, last_key_whitespace = (size_t) -1;
+ _cleanup_free_ char *contents = NULL, *key = NULL, *value = NULL;
+ unsigned line = 1;
+ char *p;
+ int r;
+
+ enum {
+ PRE_KEY,
+ KEY,
+ PRE_VALUE,
+ VALUE,
+ VALUE_ESCAPE,
+ SINGLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE_ESCAPE,
+ COMMENT,
+ COMMENT_ESCAPE
+ } state = PRE_KEY;
+
+ if (f)
+ r = read_full_stream(f, &contents, NULL);
+ else
+ r = read_full_file(fname, &contents, NULL);
+ if (r < 0)
+ return r;
+
+ for (p = contents; *p; p++) {
+ char c = *p;
+
+ switch (state) {
+
+ case PRE_KEY:
+ if (strchr(COMMENTS, c))
+ state = COMMENT;
+ else if (!strchr(WHITESPACE, c)) {
+ state = KEY;
+ last_key_whitespace = (size_t) -1;
+
+ if (!GREEDY_REALLOC(key, key_alloc, n_key+2))
+ return -ENOMEM;
+
+ key[n_key++] = c;
+ }
+ break;
+
+ case KEY:
+ if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+ n_key = 0;
+ } else if (c == '=') {
+ state = PRE_VALUE;
+ last_value_whitespace = (size_t) -1;
+ } else {
+ if (!strchr(WHITESPACE, c))
+ last_key_whitespace = (size_t) -1;
+ else if (last_key_whitespace == (size_t) -1)
+ last_key_whitespace = n_key;
+
+ if (!GREEDY_REALLOC(key, key_alloc, n_key+2))
+ return -ENOMEM;
+
+ key[n_key++] = c;
+ }
+
+ break;
+
+ case PRE_VALUE:
+ if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+ key[n_key] = 0;
+
+ if (value)
+ value[n_value] = 0;
+
+ /* strip trailing whitespace from key */
+ if (last_key_whitespace != (size_t) -1)
+ key[last_key_whitespace] = 0;
+
+ r = push(fname, line, key, value, userdata, n_pushed);
+ if (r < 0)
+ return r;
+
+ n_key = 0;
+ value = NULL;
+ value_alloc = n_value = 0;
+
+ } else if (c == '\'')
+ state = SINGLE_QUOTE_VALUE;
+ else if (c == '"')
+ state = DOUBLE_QUOTE_VALUE;
+ else if (c == '\\')
+ state = VALUE_ESCAPE;
+ else if (!strchr(WHITESPACE, c)) {
+ state = VALUE;
+
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case VALUE:
+ if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+
+ key[n_key] = 0;
+
+ if (value)
+ value[n_value] = 0;
+
+ /* Chomp off trailing whitespace from value */
+ if (last_value_whitespace != (size_t) -1)
+ value[last_value_whitespace] = 0;
+
+ /* strip trailing whitespace from key */
+ if (last_key_whitespace != (size_t) -1)
+ key[last_key_whitespace] = 0;
+
+ r = push(fname, line, key, value, userdata, n_pushed);
+ if (r < 0)
+ return r;
+
+ n_key = 0;
+ value = NULL;
+ value_alloc = n_value = 0;
+
+ } else if (c == '\\') {
+ state = VALUE_ESCAPE;
+ last_value_whitespace = (size_t) -1;
+ } else {
+ if (!strchr(WHITESPACE, c))
+ last_value_whitespace = (size_t) -1;
+ else if (last_value_whitespace == (size_t) -1)
+ last_value_whitespace = n_value;
+
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case VALUE_ESCAPE:
+ state = VALUE;
+
+ if (!strchr(NEWLINE, c)) {
+ /* Escaped newlines we eat up entirely */
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+ break;
+
+ case SINGLE_QUOTE_VALUE:
+ if (c == '\'')
+ state = PRE_VALUE;
+ else {
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case DOUBLE_QUOTE_VALUE:
+ if (c == '"')
+ state = PRE_VALUE;
+ else if (c == '\\')
+ state = DOUBLE_QUOTE_VALUE_ESCAPE;
+ else {
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case DOUBLE_QUOTE_VALUE_ESCAPE:
+ state = DOUBLE_QUOTE_VALUE;
+
+ if (c == '"') {
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+ value[n_value++] = '"';
+ } else if (!strchr(NEWLINE, c)) {
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+3))
+ return -ENOMEM;
+ value[n_value++] = '\\';
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case COMMENT:
+ if (c == '\\')
+ state = COMMENT_ESCAPE;
+ else if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+ }
+ break;
+
+ case COMMENT_ESCAPE:
+ state = COMMENT;
+ break;
+ }
+ }
+
+ if (IN_SET(state,
+ PRE_VALUE,
+ VALUE,
+ VALUE_ESCAPE,
+ SINGLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE_ESCAPE)) {
+
+ key[n_key] = 0;
+
+ if (value)
+ value[n_value] = 0;
+
+ if (state == VALUE)
+ if (last_value_whitespace != (size_t) -1)
+ value[last_value_whitespace] = 0;
+
+ /* strip trailing whitespace from key */
+ if (last_key_whitespace != (size_t) -1)
+ key[last_key_whitespace] = 0;
+
+ r = push(fname, line, key, value, userdata, n_pushed);
+ if (r < 0)
+ return r;
+
+ value = NULL;
+ }
+
+ return 0;
+}
+
+static int check_utf8ness_and_warn(
+ const char *filename, unsigned line,
+ const char *key, char *value) {
+
+ if (!utf8_is_valid(key)) {
+ _cleanup_free_ char *p = NULL;
+
+ p = utf8_escape_invalid(key);
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s:%u: invalid UTF-8 in key '%s', ignoring.",
+ strna(filename), line, p);
+ }
+
+ if (value && !utf8_is_valid(value)) {
+ _cleanup_free_ char *p = NULL;
+
+ p = utf8_escape_invalid(value);
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s:%u: invalid UTF-8 value for key %s: '%s', ignoring.",
+ strna(filename), line, key, p);
+ }
+
+ return 0;
+}
+
+static int parse_env_file_push(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+
+ const char *k;
+ va_list aq, *ap = userdata;
+ int r;
+
+ r = check_utf8ness_and_warn(filename, line, key, value);
+ if (r < 0)
+ return r;
+
+ va_copy(aq, *ap);
+
+ while ((k = va_arg(aq, const char *))) {
+ char **v;
+
+ v = va_arg(aq, char **);
+
+ if (streq(key, k)) {
+ va_end(aq);
+ free(*v);
+ *v = value;
+
+ if (n_pushed)
+ (*n_pushed)++;
+
+ return 1;
+ }
+ }
+
+ va_end(aq);
+ free(value);
+
+ return 0;
+}
+
+int parse_env_filev(
+ FILE *f,
+ const char *fname,
+ va_list ap) {
+
+ int r, n_pushed = 0;
+ va_list aq;
+
+ va_copy(aq, ap);
+ r = parse_env_file_internal(f, fname, parse_env_file_push, &aq, &n_pushed);
+ va_end(aq);
+ if (r < 0)
+ return r;
+
+ return n_pushed;
+}
+
+int parse_env_file_sentinel(
+ FILE *f,
+ const char *fname,
+ ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, fname);
+ r = parse_env_filev(f, fname, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static int load_env_file_push(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+ char ***m = userdata;
+ char *p;
+ int r;
+
+ r = check_utf8ness_and_warn(filename, line, key, value);
+ if (r < 0)
+ return r;
+
+ p = strjoin(key, "=", value);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_env_replace(m, p);
+ if (r < 0) {
+ free(p);
+ return r;
+ }
+
+ if (n_pushed)
+ (*n_pushed)++;
+
+ free(value);
+ return 0;
+}
+
+int load_env_file(FILE *f, const char *fname, char ***rl) {
+ char **m = NULL;
+ int r;
+
+ r = parse_env_file_internal(f, fname, load_env_file_push, &m, NULL);
+ if (r < 0) {
+ strv_free(m);
+ return r;
+ }
+
+ *rl = m;
+ return 0;
+}
+
+static int load_env_file_push_pairs(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+ char ***m = userdata;
+ int r;
+
+ r = check_utf8ness_and_warn(filename, line, key, value);
+ if (r < 0)
+ return r;
+
+ r = strv_extend(m, key);
+ if (r < 0)
+ return -ENOMEM;
+
+ if (!value) {
+ r = strv_extend(m, "");
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ r = strv_push(m, value);
+ if (r < 0)
+ return r;
+ }
+
+ if (n_pushed)
+ (*n_pushed)++;
+
+ return 0;
+}
+
+int load_env_file_pairs(FILE *f, const char *fname, char ***rl) {
+ char **m = NULL;
+ int r;
+
+ r = parse_env_file_internal(f, fname, load_env_file_push_pairs, &m, NULL);
+ if (r < 0) {
+ strv_free(m);
+ return r;
+ }
+
+ *rl = m;
+ return 0;
+}
+
+static int merge_env_file_push(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+
+ char ***env = userdata;
+ char *expanded_value;
+
+ assert(env);
+
+ if (!value) {
+ log_error("%s:%u: invalid syntax (around \"%s\"), ignoring.", strna(filename), line, key);
+ return 0;
+ }
+
+ if (!env_name_is_valid(key)) {
+ log_error("%s:%u: invalid variable name \"%s\", ignoring.", strna(filename), line, key);
+ free(value);
+ return 0;
+ }
+
+ expanded_value = replace_env(value, *env,
+ REPLACE_ENV_USE_ENVIRONMENT|
+ REPLACE_ENV_ALLOW_BRACELESS|
+ REPLACE_ENV_ALLOW_EXTENDED);
+ if (!expanded_value)
+ return -ENOMEM;
+
+ free_and_replace(value, expanded_value);
+
+ return load_env_file_push(filename, line, key, value, env, n_pushed);
+}
+
+int merge_env_file(
+ char ***env,
+ FILE *f,
+ const char *fname) {
+
+ /* NOTE: this function supports braceful and braceless variable expansions,
+ * plus "extended" substitutions, unlike other exported parsing functions.
+ */
+
+ return parse_env_file_internal(f, fname, merge_env_file_push, env, NULL);
+}
+
+static void write_env_var(FILE *f, const char *v) {
+ const char *p;
+
+ p = strchr(v, '=');
+ if (!p) {
+ /* Fallback */
+ fputs_unlocked(v, f);
+ fputc_unlocked('\n', f);
+ return;
+ }
+
+ p++;
+ fwrite_unlocked(v, 1, p-v, f);
+
+ if (string_has_cc(p, NULL) || chars_intersect(p, WHITESPACE SHELL_NEED_QUOTES)) {
+ fputc_unlocked('"', f);
+
+ for (; *p; p++) {
+ if (strchr(SHELL_NEED_ESCAPE, *p))
+ fputc_unlocked('\\', f);
+
+ fputc_unlocked(*p, f);
+ }
+
+ fputc_unlocked('"', f);
+ } else
+ fputs_unlocked(p, f);
+
+ fputc_unlocked('\n', f);
+}
+
+int write_env_file(const char *fname, char **l) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ char **i;
+ int r;
+
+ assert(fname);
+
+ r = fopen_temporary(fname, &f, &p);
+ if (r < 0)
+ return r;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod_umask(fileno(f), 0644);
+
+ STRV_FOREACH(i, l)
+ write_env_var(f, *i);
+
+ r = fflush_and_check(f);
+ if (r >= 0) {
+ if (rename(p, fname) >= 0)
+ return 0;
+
+ r = -errno;
+ }
+
+ unlink(p);
+ return r;
+}
diff --git a/src/basic/env-file.h b/src/basic/env-file.h
new file mode 100644
index 0000000..e1ca195
--- /dev/null
+++ b/src/basic/env-file.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "macro.h"
+
+int parse_env_filev(FILE *f, const char *fname, va_list ap);
+int parse_env_file_sentinel(FILE *f, const char *fname, ...) _sentinel_;
+#define parse_env_file(f, fname, ...) parse_env_file_sentinel(f, fname, __VA_ARGS__, NULL)
+int load_env_file(FILE *f, const char *fname, char ***l);
+int load_env_file_pairs(FILE *f, const char *fname, char ***l);
+
+int merge_env_file(char ***env, FILE *f, const char *fname);
+
+int write_env_file(const char *fname, char **l);
diff --git a/src/basic/env-util.c b/src/basic/env-util.c
new file mode 100644
index 0000000..fd449dc
--- /dev/null
+++ b/src/basic/env-util.c
@@ -0,0 +1,752 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+#define VALID_CHARS_ENV_NAME \
+ DIGITS LETTERS \
+ "_"
+
+static bool env_name_is_valid_n(const char *e, size_t n) {
+ const char *p;
+
+ if (!e)
+ return false;
+
+ if (n <= 0)
+ return false;
+
+ if (e[0] >= '0' && e[0] <= '9')
+ return false;
+
+ /* POSIX says the overall size of the environment block cannot
+ * be > ARG_MAX, an individual assignment hence cannot be
+ * either. Discounting the equal sign and trailing NUL this
+ * hence leaves ARG_MAX-2 as longest possible variable
+ * name. */
+ if (n > (size_t) sysconf(_SC_ARG_MAX) - 2)
+ return false;
+
+ for (p = e; p < e + n; p++)
+ if (!strchr(VALID_CHARS_ENV_NAME, *p))
+ return false;
+
+ return true;
+}
+
+bool env_name_is_valid(const char *e) {
+ if (!e)
+ return false;
+
+ return env_name_is_valid_n(e, strlen(e));
+}
+
+bool env_value_is_valid(const char *e) {
+ if (!e)
+ return false;
+
+ if (!utf8_is_valid(e))
+ return false;
+
+ /* bash allows tabs and newlines in environment variables, and so
+ * should we */
+ if (string_has_cc(e, "\t\n"))
+ return false;
+
+ /* POSIX says the overall size of the environment block cannot
+ * be > ARG_MAX, an individual assignment hence cannot be
+ * either. Discounting the shortest possible variable name of
+ * length 1, the equal sign and trailing NUL this hence leaves
+ * ARG_MAX-3 as longest possible variable value. */
+ if (strlen(e) > (size_t) sysconf(_SC_ARG_MAX) - 3)
+ return false;
+
+ return true;
+}
+
+bool env_assignment_is_valid(const char *e) {
+ const char *eq;
+
+ eq = strchr(e, '=');
+ if (!eq)
+ return false;
+
+ if (!env_name_is_valid_n(e, eq - e))
+ return false;
+
+ if (!env_value_is_valid(eq + 1))
+ return false;
+
+ /* POSIX says the overall size of the environment block cannot
+ * be > ARG_MAX, hence the individual variable assignments
+ * cannot be either, but let's leave room for one trailing NUL
+ * byte. */
+ if (strlen(e) > (size_t) sysconf(_SC_ARG_MAX) - 1)
+ return false;
+
+ return true;
+}
+
+bool strv_env_is_valid(char **e) {
+ char **p, **q;
+
+ STRV_FOREACH(p, e) {
+ size_t k;
+
+ if (!env_assignment_is_valid(*p))
+ return false;
+
+ /* Check if there are duplicate assignments */
+ k = strcspn(*p, "=");
+ STRV_FOREACH(q, p + 1)
+ if (strneq(*p, *q, k) && (*q)[k] == '=')
+ return false;
+ }
+
+ return true;
+}
+
+bool strv_env_name_is_valid(char **l) {
+ char **p;
+
+ STRV_FOREACH(p, l) {
+ if (!env_name_is_valid(*p))
+ return false;
+
+ if (strv_contains(p + 1, *p))
+ return false;
+ }
+
+ return true;
+}
+
+bool strv_env_name_or_assignment_is_valid(char **l) {
+ char **p;
+
+ STRV_FOREACH(p, l) {
+ if (!env_assignment_is_valid(*p) && !env_name_is_valid(*p))
+ return false;
+
+ if (strv_contains(p + 1, *p))
+ return false;
+ }
+
+ return true;
+}
+
+static int env_append(char **r, char ***k, char **a) {
+ assert(r);
+ assert(k);
+ assert(*k >= r);
+
+ if (!a)
+ return 0;
+
+ /* Expects the following arguments: 'r' shall point to the beginning of an strv we are going to append to, 'k'
+ * to a pointer pointing to the NULL entry at the end of the same array. 'a' shall point to another strv.
+ *
+ * This call adds every entry of 'a' to 'r', either overriding an existing matching entry, or appending to it.
+ *
+ * This call assumes 'r' has enough pre-allocated space to grow by all of 'a''s items. */
+
+ for (; *a; a++) {
+ char **j, *c;
+ size_t n;
+
+ n = strcspn(*a, "=");
+ if ((*a)[n] == '=')
+ n++;
+
+ for (j = r; j < *k; j++)
+ if (strneq(*j, *a, n))
+ break;
+
+ c = strdup(*a);
+ if (!c)
+ return -ENOMEM;
+
+ if (j >= *k) { /* Append to the end? */
+ (*k)[0] = c;
+ (*k)[1] = NULL;
+ (*k)++;
+ } else
+ free_and_replace(*j, c); /* Override existing item */
+ }
+
+ return 0;
+}
+
+char **strv_env_merge(size_t n_lists, ...) {
+ _cleanup_strv_free_ char **ret = NULL;
+ size_t n = 0, i;
+ char **l, **k;
+ va_list ap;
+
+ /* Merges an arbitrary number of environment sets */
+
+ va_start(ap, n_lists);
+ for (i = 0; i < n_lists; i++) {
+ l = va_arg(ap, char**);
+ n += strv_length(l);
+ }
+ va_end(ap);
+
+ ret = new(char*, n+1);
+ if (!ret)
+ return NULL;
+
+ *ret = NULL;
+ k = ret;
+
+ va_start(ap, n_lists);
+ for (i = 0; i < n_lists; i++) {
+ l = va_arg(ap, char**);
+ if (env_append(ret, &k, l) < 0) {
+ va_end(ap);
+ return NULL;
+ }
+ }
+ va_end(ap);
+
+ return TAKE_PTR(ret);
+}
+
+static bool env_match(const char *t, const char *pattern) {
+ assert(t);
+ assert(pattern);
+
+ /* pattern a matches string a
+ * a matches a=
+ * a matches a=b
+ * a= matches a=
+ * a=b matches a=b
+ * a= does not match a
+ * a=b does not match a=
+ * a=b does not match a
+ * a=b does not match a=c */
+
+ if (streq(t, pattern))
+ return true;
+
+ if (!strchr(pattern, '=')) {
+ size_t l = strlen(pattern);
+
+ return strneq(t, pattern, l) && t[l] == '=';
+ }
+
+ return false;
+}
+
+static bool env_entry_has_name(const char *entry, const char *name) {
+ const char *t;
+
+ assert(entry);
+ assert(name);
+
+ t = startswith(entry, name);
+ if (!t)
+ return false;
+
+ return *t == '=';
+}
+
+char **strv_env_delete(char **x, size_t n_lists, ...) {
+ size_t n, i = 0;
+ char **k, **r;
+ va_list ap;
+
+ /* Deletes every entry from x that is mentioned in the other
+ * string lists */
+
+ n = strv_length(x);
+
+ r = new(char*, n+1);
+ if (!r)
+ return NULL;
+
+ STRV_FOREACH(k, x) {
+ size_t v;
+
+ va_start(ap, n_lists);
+ for (v = 0; v < n_lists; v++) {
+ char **l, **j;
+
+ l = va_arg(ap, char**);
+ STRV_FOREACH(j, l)
+ if (env_match(*k, *j))
+ goto skip;
+ }
+ va_end(ap);
+
+ r[i] = strdup(*k);
+ if (!r[i]) {
+ strv_free(r);
+ return NULL;
+ }
+
+ i++;
+ continue;
+
+ skip:
+ va_end(ap);
+ }
+
+ r[i] = NULL;
+
+ assert(i <= n);
+
+ return r;
+}
+
+char **strv_env_unset(char **l, const char *p) {
+
+ char **f, **t;
+
+ if (!l)
+ return NULL;
+
+ assert(p);
+
+ /* Drops every occurrence of the env var setting p in the
+ * string list. Edits in-place. */
+
+ for (f = t = l; *f; f++) {
+
+ if (env_match(*f, p)) {
+ free(*f);
+ continue;
+ }
+
+ *(t++) = *f;
+ }
+
+ *t = NULL;
+ return l;
+}
+
+char **strv_env_unset_many(char **l, ...) {
+ char **f, **t;
+
+ if (!l)
+ return NULL;
+
+ /* Like strv_env_unset() but applies many at once. Edits in-place. */
+
+ for (f = t = l; *f; f++) {
+ bool found = false;
+ const char *p;
+ va_list ap;
+
+ va_start(ap, l);
+
+ while ((p = va_arg(ap, const char*))) {
+ if (env_match(*f, p)) {
+ found = true;
+ break;
+ }
+ }
+
+ va_end(ap);
+
+ if (found) {
+ free(*f);
+ continue;
+ }
+
+ *(t++) = *f;
+ }
+
+ *t = NULL;
+ return l;
+}
+
+int strv_env_replace(char ***l, char *p) {
+ const char *t, *name;
+ char **f;
+ int r;
+
+ assert(p);
+
+ /* Replace first occurrence of the env var or add a new one in the string list. Drop other occurrences. Edits
+ * in-place. Does not copy p. p must be a valid key=value assignment.
+ */
+
+ t = strchr(p, '=');
+ if (!t)
+ return -EINVAL;
+
+ name = strndupa(p, t - p);
+
+ STRV_FOREACH(f, *l)
+ if (env_entry_has_name(*f, name)) {
+ free_and_replace(*f, p);
+ strv_env_unset(f + 1, *f);
+ return 0;
+ }
+
+ /* We didn't find a match, we need to append p or create a new strv */
+ r = strv_push(l, p);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+char **strv_env_set(char **x, const char *p) {
+ _cleanup_strv_free_ char **ret = NULL;
+ size_t n, m;
+ char **k;
+
+ /* Overrides the env var setting of p, returns a new copy */
+
+ n = strv_length(x);
+ m = n + 2;
+ if (m < n) /* overflow? */
+ return NULL;
+
+ ret = new(char*, m);
+ if (!ret)
+ return NULL;
+
+ *ret = NULL;
+ k = ret;
+
+ if (env_append(ret, &k, x) < 0)
+ return NULL;
+
+ if (env_append(ret, &k, STRV_MAKE(p)) < 0)
+ return NULL;
+
+ return TAKE_PTR(ret);
+}
+
+char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) {
+ char **i;
+
+ assert(name);
+
+ if (k <= 0)
+ return NULL;
+
+ STRV_FOREACH_BACKWARDS(i, l)
+ if (strneq(*i, name, k) &&
+ (*i)[k] == '=')
+ return *i + k + 1;
+
+ if (flags & REPLACE_ENV_USE_ENVIRONMENT) {
+ const char *t;
+
+ t = strndupa(name, k);
+ return getenv(t);
+ };
+
+ return NULL;
+}
+
+char *strv_env_get(char **l, const char *name) {
+ assert(name);
+
+ return strv_env_get_n(l, name, strlen(name), 0);
+}
+
+char **strv_env_clean_with_callback(char **e, void (*invalid_callback)(const char *p, void *userdata), void *userdata) {
+ char **p, **q;
+ int k = 0;
+
+ STRV_FOREACH(p, e) {
+ size_t n;
+ bool duplicate = false;
+
+ if (!env_assignment_is_valid(*p)) {
+ if (invalid_callback)
+ invalid_callback(*p, userdata);
+ free(*p);
+ continue;
+ }
+
+ n = strcspn(*p, "=");
+ STRV_FOREACH(q, p + 1)
+ if (strneq(*p, *q, n) && (*q)[n] == '=') {
+ duplicate = true;
+ break;
+ }
+
+ if (duplicate) {
+ free(*p);
+ continue;
+ }
+
+ e[k++] = *p;
+ }
+
+ if (e)
+ e[k] = NULL;
+
+ return e;
+}
+
+char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) {
+ enum {
+ WORD,
+ CURLY,
+ VARIABLE,
+ VARIABLE_RAW,
+ TEST,
+ DEFAULT_VALUE,
+ ALTERNATE_VALUE,
+ } state = WORD;
+
+ const char *e, *word = format, *test_value;
+ char *k;
+ _cleanup_free_ char *r = NULL;
+ size_t i, len;
+ int nest = 0;
+
+ assert(format);
+
+ for (e = format, i = 0; *e && i < n; e ++, i ++)
+ switch (state) {
+
+ case WORD:
+ if (*e == '$')
+ state = CURLY;
+ break;
+
+ case CURLY:
+ if (*e == '{') {
+ k = strnappend(r, word, e-word-1);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e-1;
+ state = VARIABLE;
+ nest++;
+ } else if (*e == '$') {
+ k = strnappend(r, word, e-word);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e+1;
+ state = WORD;
+
+ } else if (flags & REPLACE_ENV_ALLOW_BRACELESS && strchr(VALID_CHARS_ENV_NAME, *e)) {
+ k = strnappend(r, word, e-word-1);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e-1;
+ state = VARIABLE_RAW;
+
+ } else
+ state = WORD;
+ break;
+
+ case VARIABLE:
+ if (*e == '}') {
+ const char *t;
+
+ t = strv_env_get_n(env, word+2, e-word-2, flags);
+
+ k = strappend(r, t);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e+1;
+ state = WORD;
+ } else if (*e == ':') {
+ if (!(flags & REPLACE_ENV_ALLOW_EXTENDED))
+ /* Treat this as unsupported syntax, i.e. do no replacement */
+ state = WORD;
+ else {
+ len = e-word-2;
+ state = TEST;
+ }
+ }
+ break;
+
+ case TEST:
+ if (*e == '-')
+ state = DEFAULT_VALUE;
+ else if (*e == '+')
+ state = ALTERNATE_VALUE;
+ else {
+ state = WORD;
+ break;
+ }
+
+ test_value = e+1;
+ break;
+
+ case DEFAULT_VALUE: /* fall through */
+ case ALTERNATE_VALUE:
+ assert(flags & REPLACE_ENV_ALLOW_EXTENDED);
+
+ if (*e == '{') {
+ nest++;
+ break;
+ }
+
+ if (*e != '}')
+ break;
+
+ nest--;
+ if (nest == 0) {
+ const char *t;
+ _cleanup_free_ char *v = NULL;
+
+ t = strv_env_get_n(env, word+2, len, flags);
+
+ if (t && state == ALTERNATE_VALUE)
+ t = v = replace_env_n(test_value, e-test_value, env, flags);
+ else if (!t && state == DEFAULT_VALUE)
+ t = v = replace_env_n(test_value, e-test_value, env, flags);
+
+ k = strappend(r, t);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e+1;
+ state = WORD;
+ }
+ break;
+
+ case VARIABLE_RAW:
+ assert(flags & REPLACE_ENV_ALLOW_BRACELESS);
+
+ if (!strchr(VALID_CHARS_ENV_NAME, *e)) {
+ const char *t;
+
+ t = strv_env_get_n(env, word+1, e-word-1, flags);
+
+ k = strappend(r, t);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e--;
+ i--;
+ state = WORD;
+ }
+ break;
+ }
+
+ if (state == VARIABLE_RAW) {
+ const char *t;
+
+ assert(flags & REPLACE_ENV_ALLOW_BRACELESS);
+
+ t = strv_env_get_n(env, word+1, e-word-1, flags);
+ return strappend(r, t);
+ } else
+ return strnappend(r, word, e-word);
+}
+
+char **replace_env_argv(char **argv, char **env) {
+ char **ret, **i;
+ size_t k = 0, l = 0;
+
+ l = strv_length(argv);
+
+ ret = new(char*, l+1);
+ if (!ret)
+ return NULL;
+
+ STRV_FOREACH(i, argv) {
+
+ /* If $FOO appears as single word, replace it by the split up variable */
+ if ((*i)[0] == '$' && !IN_SET((*i)[1], '{', '$')) {
+ char *e;
+ char **w, **m = NULL;
+ size_t q;
+
+ e = strv_env_get(env, *i+1);
+ if (e) {
+ int r;
+
+ r = strv_split_extract(&m, e, WHITESPACE, EXTRACT_RELAX|EXTRACT_QUOTES);
+ if (r < 0) {
+ ret[k] = NULL;
+ strv_free(ret);
+ return NULL;
+ }
+ } else
+ m = NULL;
+
+ q = strv_length(m);
+ l = l + q - 1;
+
+ w = reallocarray(ret, l + 1, sizeof(char *));
+ if (!w) {
+ ret[k] = NULL;
+ strv_free(ret);
+ strv_free(m);
+ return NULL;
+ }
+
+ ret = w;
+ if (m) {
+ memcpy(ret + k, m, q * sizeof(char*));
+ free(m);
+ }
+
+ k += q;
+ continue;
+ }
+
+ /* If ${FOO} appears as part of a word, replace it by the variable as-is */
+ ret[k] = replace_env(*i, env, 0);
+ if (!ret[k]) {
+ strv_free(ret);
+ return NULL;
+ }
+ k++;
+ }
+
+ ret[k] = NULL;
+ return ret;
+}
+
+int getenv_bool(const char *p) {
+ const char *e;
+
+ e = getenv(p);
+ if (!e)
+ return -ENXIO;
+
+ return parse_boolean(e);
+}
+
+int getenv_bool_secure(const char *p) {
+ const char *e;
+
+ e = secure_getenv(p);
+ if (!e)
+ return -ENXIO;
+
+ return parse_boolean(e);
+}
diff --git a/src/basic/env-util.h b/src/basic/env-util.h
new file mode 100644
index 0000000..d54f996
--- /dev/null
+++ b/src/basic/env-util.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include "macro.h"
+#include "string.h"
+
+bool env_name_is_valid(const char *e);
+bool env_value_is_valid(const char *e);
+bool env_assignment_is_valid(const char *e);
+
+enum {
+ REPLACE_ENV_USE_ENVIRONMENT = 1 << 0,
+ REPLACE_ENV_ALLOW_BRACELESS = 1 << 1,
+ REPLACE_ENV_ALLOW_EXTENDED = 1 << 2,
+};
+
+char *replace_env_n(const char *format, size_t n, char **env, unsigned flags);
+char **replace_env_argv(char **argv, char **env);
+
+static inline char *replace_env(const char *format, char **env, unsigned flags) {
+ return replace_env_n(format, strlen(format), env, flags);
+}
+
+bool strv_env_is_valid(char **e);
+#define strv_env_clean(l) strv_env_clean_with_callback(l, NULL, NULL)
+char **strv_env_clean_with_callback(char **l, void (*invalid_callback)(const char *p, void *userdata), void *userdata);
+
+bool strv_env_name_is_valid(char **l);
+bool strv_env_name_or_assignment_is_valid(char **l);
+
+char **strv_env_merge(size_t n_lists, ...);
+char **strv_env_delete(char **x, size_t n_lists, ...); /* New copy */
+
+char **strv_env_set(char **x, const char *p); /* New copy ... */
+char **strv_env_unset(char **l, const char *p); /* In place ... */
+char **strv_env_unset_many(char **l, ...) _sentinel_;
+int strv_env_replace(char ***l, char *p); /* In place ... */
+
+char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) _pure_;
+char *strv_env_get(char **x, const char *n) _pure_;
+
+int getenv_bool(const char *p);
+int getenv_bool_secure(const char *p);
diff --git a/src/basic/errno-list.c b/src/basic/errno-list.c
new file mode 100644
index 0000000..44cc570
--- /dev/null
+++ b/src/basic/errno-list.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "errno-list.h"
+#include "macro.h"
+
+static const struct errno_name* lookup_errno(register const char *str,
+ register GPERF_LEN_TYPE len);
+
+#include "errno-from-name.h"
+#include "errno-to-name.h"
+
+const char *errno_to_name(int id) {
+
+ if (id < 0)
+ id = -id;
+
+ if ((size_t) id >= ELEMENTSOF(errno_names))
+ return NULL;
+
+ return errno_names[id];
+}
+
+int errno_from_name(const char *name) {
+ const struct errno_name *sc;
+
+ assert(name);
+
+ sc = lookup_errno(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ assert(sc->id > 0);
+ return sc->id;
+}
diff --git a/src/basic/errno-list.h b/src/basic/errno-list.h
new file mode 100644
index 0000000..9c639b4
--- /dev/null
+++ b/src/basic/errno-list.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+/*
+ * MAX_ERRNO is defined as 4095 in linux/err.h
+ * We use the same value here.
+ */
+#define ERRNO_MAX 4095
+
+const char *errno_to_name(int id);
+int errno_from_name(const char *name);
+static inline bool errno_is_valid(int n) {
+ return n > 0 && n <= ERRNO_MAX;
+}
diff --git a/src/basic/errno-to-name.awk b/src/basic/errno-to-name.awk
new file mode 100644
index 0000000..0878aba
--- /dev/null
+++ b/src/basic/errno-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const errno_names[] = { "
+}
+!/EDEADLOCK/ && !/EWOULDBLOCK/ && !/ENOTSUP/ {
+ printf " [%s] = \"%s\",\n", $1, $1
+}
+END{
+ print "};"
+}
diff --git a/src/basic/escape.c b/src/basic/escape.c
new file mode 100644
index 0000000..5f71515
--- /dev/null
+++ b/src/basic/escape.c
@@ -0,0 +1,506 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "utf8.h"
+
+int cescape_char(char c, char *buf) {
+ char *buf_old = buf;
+
+ /* Needs space for 4 characters in the buffer */
+
+ switch (c) {
+
+ case '\a':
+ *(buf++) = '\\';
+ *(buf++) = 'a';
+ break;
+ case '\b':
+ *(buf++) = '\\';
+ *(buf++) = 'b';
+ break;
+ case '\f':
+ *(buf++) = '\\';
+ *(buf++) = 'f';
+ break;
+ case '\n':
+ *(buf++) = '\\';
+ *(buf++) = 'n';
+ break;
+ case '\r':
+ *(buf++) = '\\';
+ *(buf++) = 'r';
+ break;
+ case '\t':
+ *(buf++) = '\\';
+ *(buf++) = 't';
+ break;
+ case '\v':
+ *(buf++) = '\\';
+ *(buf++) = 'v';
+ break;
+ case '\\':
+ *(buf++) = '\\';
+ *(buf++) = '\\';
+ break;
+ case '"':
+ *(buf++) = '\\';
+ *(buf++) = '"';
+ break;
+ case '\'':
+ *(buf++) = '\\';
+ *(buf++) = '\'';
+ break;
+
+ default:
+ /* For special chars we prefer octal over
+ * hexadecimal encoding, simply because glib's
+ * g_strescape() does the same */
+ if ((c < ' ') || (c >= 127)) {
+ *(buf++) = '\\';
+ *(buf++) = octchar((unsigned char) c >> 6);
+ *(buf++) = octchar((unsigned char) c >> 3);
+ *(buf++) = octchar((unsigned char) c);
+ } else
+ *(buf++) = c;
+ break;
+ }
+
+ return buf - buf_old;
+}
+
+char *cescape_length(const char *s, size_t n) {
+ const char *f;
+ char *r, *t;
+
+ assert(s || n == 0);
+
+ /* Does C style string escaping. May be reversed with
+ * cunescape(). */
+
+ r = new(char, n*4 + 1);
+ if (!r)
+ return NULL;
+
+ for (f = s, t = r; f < s + n; f++)
+ t += cescape_char(*f, t);
+
+ *t = 0;
+
+ return r;
+}
+
+char *cescape(const char *s) {
+ assert(s);
+
+ return cescape_length(s, strlen(s));
+}
+
+int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
+ int r = 1;
+
+ assert(p);
+ assert(ret);
+
+ /* Unescapes C style. Returns the unescaped character in ret.
+ * Sets *eight_bit to true if the escaped sequence either fits in
+ * one byte in UTF-8 or is a non-unicode literal byte and should
+ * instead be copied directly.
+ */
+
+ if (length != (size_t) -1 && length < 1)
+ return -EINVAL;
+
+ switch (p[0]) {
+
+ case 'a':
+ *ret = '\a';
+ break;
+ case 'b':
+ *ret = '\b';
+ break;
+ case 'f':
+ *ret = '\f';
+ break;
+ case 'n':
+ *ret = '\n';
+ break;
+ case 'r':
+ *ret = '\r';
+ break;
+ case 't':
+ *ret = '\t';
+ break;
+ case 'v':
+ *ret = '\v';
+ break;
+ case '\\':
+ *ret = '\\';
+ break;
+ case '"':
+ *ret = '"';
+ break;
+ case '\'':
+ *ret = '\'';
+ break;
+
+ case 's':
+ /* This is an extension of the XDG syntax files */
+ *ret = ' ';
+ break;
+
+ case 'x': {
+ /* hexadecimal encoding */
+ int a, b;
+
+ if (length != (size_t) -1 && length < 3)
+ return -EINVAL;
+
+ a = unhexchar(p[1]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unhexchar(p[2]);
+ if (b < 0)
+ return -EINVAL;
+
+ /* Don't allow NUL bytes */
+ if (a == 0 && b == 0)
+ return -EINVAL;
+
+ *ret = (a << 4U) | b;
+ *eight_bit = true;
+ r = 3;
+ break;
+ }
+
+ case 'u': {
+ /* C++11 style 16bit unicode */
+
+ int a[4];
+ size_t i;
+ uint32_t c;
+
+ if (length != (size_t) -1 && length < 5)
+ return -EINVAL;
+
+ for (i = 0; i < 4; i++) {
+ a[i] = unhexchar(p[1 + i]);
+ if (a[i] < 0)
+ return a[i];
+ }
+
+ c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
+
+ /* Don't allow 0 chars */
+ if (c == 0)
+ return -EINVAL;
+
+ *ret = c;
+ r = 5;
+ break;
+ }
+
+ case 'U': {
+ /* C++11 style 32bit unicode */
+
+ int a[8];
+ size_t i;
+ char32_t c;
+
+ if (length != (size_t) -1 && length < 9)
+ return -EINVAL;
+
+ for (i = 0; i < 8; i++) {
+ a[i] = unhexchar(p[1 + i]);
+ if (a[i] < 0)
+ return a[i];
+ }
+
+ c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
+ ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
+
+ /* Don't allow 0 chars */
+ if (c == 0)
+ return -EINVAL;
+
+ /* Don't allow invalid code points */
+ if (!unichar_is_valid(c))
+ return -EINVAL;
+
+ *ret = c;
+ r = 9;
+ break;
+ }
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7': {
+ /* octal encoding */
+ int a, b, c;
+ char32_t m;
+
+ if (length != (size_t) -1 && length < 3)
+ return -EINVAL;
+
+ a = unoctchar(p[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unoctchar(p[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unoctchar(p[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ /* don't allow NUL bytes */
+ if (a == 0 && b == 0 && c == 0)
+ return -EINVAL;
+
+ /* Don't allow bytes above 255 */
+ m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
+ if (m > 255)
+ return -EINVAL;
+
+ *ret = m;
+ *eight_bit = true;
+ r = 3;
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
+ char *r, *t;
+ const char *f;
+ size_t pl;
+
+ assert(s);
+ assert(ret);
+
+ /* Undoes C style string escaping, and optionally prefixes it. */
+
+ pl = strlen_ptr(prefix);
+
+ r = new(char, pl+length+1);
+ if (!r)
+ return -ENOMEM;
+
+ if (prefix)
+ memcpy(r, prefix, pl);
+
+ for (f = s, t = r + pl; f < s + length; f++) {
+ size_t remaining;
+ bool eight_bit = false;
+ char32_t u;
+ int k;
+
+ remaining = s + length - f;
+ assert(remaining > 0);
+
+ if (*f != '\\') {
+ /* A literal, copy verbatim */
+ *(t++) = *f;
+ continue;
+ }
+
+ if (remaining == 1) {
+ if (flags & UNESCAPE_RELAX) {
+ /* A trailing backslash, copy verbatim */
+ *(t++) = *f;
+ continue;
+ }
+
+ free(r);
+ return -EINVAL;
+ }
+
+ k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
+ if (k < 0) {
+ if (flags & UNESCAPE_RELAX) {
+ /* Invalid escape code, let's take it literal then */
+ *(t++) = '\\';
+ continue;
+ }
+
+ free(r);
+ return k;
+ }
+
+ f += k;
+ if (eight_bit)
+ /* One byte? Set directly as specified */
+ *(t++) = u;
+ else
+ /* Otherwise encode as multi-byte UTF-8 */
+ t += utf8_encode_unichar(t, u);
+ }
+
+ *t = 0;
+
+ *ret = r;
+ return t - r;
+}
+
+int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
+ return cunescape_length_with_prefix(s, length, NULL, flags, ret);
+}
+
+int cunescape(const char *s, UnescapeFlags flags, char **ret) {
+ return cunescape_length(s, strlen(s), flags, ret);
+}
+
+char *xescape(const char *s, const char *bad) {
+ char *r, *t;
+ const char *f;
+
+ /* Escapes all chars in bad, in addition to \ and all special
+ * chars, in \xFF style escaping. May be reversed with
+ * cunescape(). */
+
+ r = new(char, strlen(s) * 4 + 1);
+ if (!r)
+ return NULL;
+
+ for (f = s, t = r; *f; f++) {
+
+ if ((*f < ' ') || (*f >= 127) ||
+ (*f == '\\') || strchr(bad, *f)) {
+ *(t++) = '\\';
+ *(t++) = 'x';
+ *(t++) = hexchar(*f >> 4);
+ *(t++) = hexchar(*f);
+ } else
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ return r;
+}
+
+char *octescape(const char *s, size_t len) {
+ char *r, *t;
+ const char *f;
+
+ /* Escapes all chars in bad, in addition to \ and " chars,
+ * in \nnn style escaping. */
+
+ r = new(char, len * 4 + 1);
+ if (!r)
+ return NULL;
+
+ for (f = s, t = r; f < s + len; f++) {
+
+ if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
+ *(t++) = '\\';
+ *(t++) = '0' + (*f >> 6);
+ *(t++) = '0' + ((*f >> 3) & 8);
+ *(t++) = '0' + (*f & 8);
+ } else
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ return r;
+
+}
+
+static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
+ assert(bad);
+
+ for (; *s; s++) {
+ if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
+ *(t++) = '\\';
+ *(t++) = *s == '\n' ? 'n' : 't';
+ continue;
+ }
+
+ if (*s == '\\' || strchr(bad, *s))
+ *(t++) = '\\';
+
+ *(t++) = *s;
+ }
+
+ return t;
+}
+
+char *shell_escape(const char *s, const char *bad) {
+ char *r, *t;
+
+ r = new(char, strlen(s)*2+1);
+ if (!r)
+ return NULL;
+
+ t = strcpy_backslash_escaped(r, s, bad, false);
+ *t = 0;
+
+ return r;
+}
+
+char* shell_maybe_quote(const char *s, EscapeStyle style) {
+ const char *p;
+ char *r, *t;
+
+ assert(s);
+
+ /* Encloses a string in quotes if necessary to make it OK as a shell
+ * string. Note that we treat benign UTF-8 characters as needing
+ * escaping too, but that should be OK. */
+
+ for (p = s; *p; p++)
+ if (*p <= ' ' ||
+ *p >= 127 ||
+ strchr(SHELL_NEED_QUOTES, *p))
+ break;
+
+ if (!*p)
+ return strdup(s);
+
+ r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
+ if (!r)
+ return NULL;
+
+ t = r;
+ if (style == ESCAPE_BACKSLASH)
+ *(t++) = '"';
+ else if (style == ESCAPE_POSIX) {
+ *(t++) = '$';
+ *(t++) = '\'';
+ } else
+ assert_not_reached("Bad EscapeStyle");
+
+ t = mempcpy(t, s, p - s);
+
+ if (style == ESCAPE_BACKSLASH)
+ t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false);
+ else
+ t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
+
+ if (style == ESCAPE_BACKSLASH)
+ *(t++) = '"';
+ else
+ *(t++) = '\'';
+ *t = 0;
+
+ return r;
+}
diff --git a/src/basic/escape.h b/src/basic/escape.h
new file mode 100644
index 0000000..5156209
--- /dev/null
+++ b/src/basic/escape.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <uchar.h>
+
+#include "string-util.h"
+#include "missing_type.h"
+
+/* What characters are special in the shell? */
+/* must be escaped outside and inside double-quotes */
+#define SHELL_NEED_ESCAPE "\"\\`$"
+
+/* Those that can be escaped or double-quoted.
+ *
+ * Stricly speaking, ! does not need to be escaped, except in interactive
+ * mode, but let's be extra nice to the user and quote ! in case this
+ * output is ever used in interactive mode. */
+#define SHELL_NEED_QUOTES SHELL_NEED_ESCAPE GLOB_CHARS "'()<>|&;!"
+
+/* Note that we assume control characters would need to be escaped too in
+ * addition to the "special" characters listed here, if they appear in the
+ * string. Current users disallow control characters. Also '"' shall not
+ * be escaped.
+ */
+#define SHELL_NEED_ESCAPE_POSIX "\\\'"
+
+typedef enum UnescapeFlags {
+ UNESCAPE_RELAX = 1,
+} UnescapeFlags;
+
+typedef enum EscapeStyle {
+ ESCAPE_BACKSLASH = 1,
+ ESCAPE_POSIX = 2,
+} EscapeStyle;
+
+char *cescape(const char *s);
+char *cescape_length(const char *s, size_t n);
+int cescape_char(char c, char *buf);
+
+int cunescape(const char *s, UnescapeFlags flags, char **ret);
+int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret);
+int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret);
+int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit);
+
+char *xescape(const char *s, const char *bad);
+char *octescape(const char *s, size_t len);
+
+char *shell_escape(const char *s, const char *bad);
+char* shell_maybe_quote(const char *s, EscapeStyle style);
diff --git a/src/basic/ether-addr-util.c b/src/basic/ether-addr-util.c
new file mode 100644
index 0000000..e875696
--- /dev/null
+++ b/src/basic/ether-addr-util.c
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <net/ethernet.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "ether-addr-util.h"
+#include "macro.h"
+#include "string-util.h"
+
+char* ether_addr_to_string(const struct ether_addr *addr, char buffer[ETHER_ADDR_TO_STRING_MAX]) {
+ assert(addr);
+ assert(buffer);
+
+ /* Like ether_ntoa() but uses %02x instead of %x to print
+ * ethernet addresses, which makes them look less funny. Also,
+ * doesn't use a static buffer. */
+
+ sprintf(buffer, "%02x:%02x:%02x:%02x:%02x:%02x",
+ addr->ether_addr_octet[0],
+ addr->ether_addr_octet[1],
+ addr->ether_addr_octet[2],
+ addr->ether_addr_octet[3],
+ addr->ether_addr_octet[4],
+ addr->ether_addr_octet[5]);
+
+ return buffer;
+}
+
+int ether_addr_compare(const struct ether_addr *a, const struct ether_addr *b) {
+ return memcmp(a, b, ETH_ALEN);
+}
+
+static void ether_addr_hash_func(const struct ether_addr *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(struct ether_addr), state);
+}
+
+DEFINE_HASH_OPS(ether_addr_hash_ops, struct ether_addr, ether_addr_hash_func, ether_addr_compare);
+
+int ether_addr_from_string(const char *s, struct ether_addr *ret) {
+ size_t pos = 0, n, field;
+ char sep = '\0';
+ const char *hex = HEXDIGITS, *hexoff;
+ size_t x;
+ bool touched;
+
+#define parse_fields(v) \
+ for (field = 0; field < ELEMENTSOF(v); field++) { \
+ touched = false; \
+ for (n = 0; n < (2 * sizeof(v[0])); n++) { \
+ if (s[pos] == '\0') \
+ break; \
+ hexoff = strchr(hex, s[pos]); \
+ if (!hexoff) \
+ break; \
+ assert(hexoff >= hex); \
+ x = hexoff - hex; \
+ if (x >= 16) \
+ x -= 6; /* A-F */ \
+ assert(x < 16); \
+ touched = true; \
+ v[field] <<= 4; \
+ v[field] += x; \
+ pos++; \
+ } \
+ if (!touched) \
+ return -EINVAL; \
+ if (field < (ELEMENTSOF(v)-1)) { \
+ if (s[pos] != sep) \
+ return -EINVAL; \
+ else \
+ pos++; \
+ } \
+ }
+
+ assert(s);
+ assert(ret);
+
+ s += strspn(s, WHITESPACE);
+ sep = s[strspn(s, hex)];
+
+ if (sep == '.') {
+ uint16_t shorts[3] = { 0 };
+
+ parse_fields(shorts);
+
+ if (s[pos] != '\0')
+ return -EINVAL;
+
+ for (n = 0; n < ELEMENTSOF(shorts); n++) {
+ ret->ether_addr_octet[2*n] = ((shorts[n] & (uint16_t)0xff00) >> 8);
+ ret->ether_addr_octet[2*n + 1] = (shorts[n] & (uint16_t)0x00ff);
+ }
+
+ } else if (IN_SET(sep, ':', '-')) {
+ struct ether_addr out = ETHER_ADDR_NULL;
+
+ parse_fields(out.ether_addr_octet);
+
+ if (s[pos] != '\0')
+ return -EINVAL;
+
+ for (n = 0; n < ELEMENTSOF(out.ether_addr_octet); n++)
+ ret->ether_addr_octet[n] = out.ether_addr_octet[n];
+
+ } else
+ return -EINVAL;
+
+ return 0;
+}
diff --git a/src/basic/ether-addr-util.h b/src/basic/ether-addr-util.h
new file mode 100644
index 0000000..4e44b30
--- /dev/null
+++ b/src/basic/ether-addr-util.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <net/ethernet.h>
+#include <stdbool.h>
+
+#include "hash-funcs.h"
+
+#define ETHER_ADDR_FORMAT_STR "%02X%02X%02X%02X%02X%02X"
+#define ETHER_ADDR_FORMAT_VAL(x) (x).ether_addr_octet[0], (x).ether_addr_octet[1], (x).ether_addr_octet[2], (x).ether_addr_octet[3], (x).ether_addr_octet[4], (x).ether_addr_octet[5]
+
+#define ETHER_ADDR_TO_STRING_MAX (3*6)
+char* ether_addr_to_string(const struct ether_addr *addr, char buffer[ETHER_ADDR_TO_STRING_MAX]);
+
+int ether_addr_compare(const struct ether_addr *a, const struct ether_addr *b);
+static inline bool ether_addr_equal(const struct ether_addr *a, const struct ether_addr *b) {
+ return ether_addr_compare(a, b) == 0;
+}
+
+#define ETHER_ADDR_NULL ((const struct ether_addr){})
+
+static inline bool ether_addr_is_null(const struct ether_addr *addr) {
+ return ether_addr_equal(addr, &ETHER_ADDR_NULL);
+}
+
+int ether_addr_from_string(const char *s, struct ether_addr *ret);
+
+extern const struct hash_ops ether_addr_hash_ops;
diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c
new file mode 100644
index 0000000..a861b56
--- /dev/null
+++ b/src/basic/extract-word.c
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "utf8.h"
+
+int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
+ _cleanup_free_ char *s = NULL;
+ size_t allocated = 0, sz = 0;
+ char c;
+ int r;
+
+ char quote = 0; /* 0 or ' or " */
+ bool backslash = false; /* whether we've just seen a backslash */
+
+ assert(p);
+ assert(ret);
+
+ /* Bail early if called after last value or with no input */
+ if (!*p)
+ goto finish;
+ c = **p;
+
+ if (!separators)
+ separators = WHITESPACE;
+
+ /* Parses the first word of a string, and returns it in
+ * *ret. Removes all quotes in the process. When parsing fails
+ * (because of an uneven number of quotes or similar), leaves
+ * the pointer *p at the first invalid character. */
+
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
+ if (!GREEDY_REALLOC(s, allocated, sz+1))
+ return -ENOMEM;
+
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ else if (strchr(separators, c)) {
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
+ (*p)++;
+ goto finish_force_next;
+ }
+ } else {
+ /* We found a non-blank character, so we will always
+ * want to return a string (even if it is empty),
+ * allocate it here. */
+ if (!GREEDY_REALLOC(s, allocated, sz+1))
+ return -ENOMEM;
+ break;
+ }
+ }
+
+ for (;; (*p)++, c = **p) {
+ if (backslash) {
+ if (!GREEDY_REALLOC(s, allocated, sz+7))
+ return -ENOMEM;
+
+ if (c == 0) {
+ if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
+ (!quote || flags & EXTRACT_RELAX)) {
+ /* If we find an unquoted trailing backslash and we're in
+ * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
+ * output.
+ *
+ * Unbalanced quotes will only be allowed in EXTRACT_RELAX
+ * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
+ */
+ s[sz++] = '\\';
+ goto finish_force_terminate;
+ }
+ if (flags & EXTRACT_RELAX)
+ goto finish_force_terminate;
+ return -EINVAL;
+ }
+
+ if (flags & EXTRACT_CUNESCAPE) {
+ bool eight_bit = false;
+ char32_t u;
+
+ r = cunescape_one(*p, (size_t) -1, &u, &eight_bit);
+ if (r < 0) {
+ if (flags & EXTRACT_CUNESCAPE_RELAX) {
+ s[sz++] = '\\';
+ s[sz++] = c;
+ } else
+ return -EINVAL;
+ } else {
+ (*p) += r - 1;
+
+ if (eight_bit)
+ s[sz++] = u;
+ else
+ sz += utf8_encode_unichar(s + sz, u);
+ }
+ } else
+ s[sz++] = c;
+
+ backslash = false;
+
+ } else if (quote) { /* inside either single or double quotes */
+ for (;; (*p)++, c = **p) {
+ if (c == 0) {
+ if (flags & EXTRACT_RELAX)
+ goto finish_force_terminate;
+ return -EINVAL;
+ } else if (c == quote) { /* found the end quote */
+ quote = 0;
+ break;
+ } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
+ backslash = true;
+ break;
+ } else {
+ if (!GREEDY_REALLOC(s, allocated, sz+2))
+ return -ENOMEM;
+
+ s[sz++] = c;
+ }
+ }
+
+ } else {
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ else if (IN_SET(c, '\'', '"') && (flags & EXTRACT_QUOTES)) {
+ quote = c;
+ break;
+ } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
+ backslash = true;
+ break;
+ } else if (strchr(separators, c)) {
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
+ (*p)++;
+ goto finish_force_next;
+ }
+ /* Skip additional coalesced separators. */
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ if (!strchr(separators, c))
+ break;
+ }
+ goto finish;
+
+ } else {
+ if (!GREEDY_REALLOC(s, allocated, sz+2))
+ return -ENOMEM;
+
+ s[sz++] = c;
+ }
+ }
+ }
+ }
+
+finish_force_terminate:
+ *p = NULL;
+finish:
+ if (!s) {
+ *p = NULL;
+ *ret = NULL;
+ return 0;
+ }
+
+finish_force_next:
+ s[sz] = 0;
+ *ret = TAKE_PTR(s);
+
+ return 1;
+}
+
+int extract_first_word_and_warn(
+ const char **p,
+ char **ret,
+ const char *separators,
+ ExtractFlags flags,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *rvalue) {
+
+ /* Try to unquote it, if it fails, warn about it and try again
+ * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
+ * backslashes verbatim in invalid escape sequences. */
+
+ const char *save;
+ int r;
+
+ save = *p;
+ r = extract_first_word(p, ret, separators, flags);
+ if (r >= 0)
+ return r;
+
+ if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
+
+ /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
+ *p = save;
+ r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
+ if (r >= 0) {
+ /* It worked this time, hence it must have been an invalid escape sequence. */
+ log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
+ return r;
+ }
+
+ /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
+ if (r == -EINVAL)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
+ }
+
+ /* Can be any error, report it */
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
+}
+
+/* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
+ * an object that undergoes default argument promotion as an argument to va_start).
+ * Let's make sure that ExtractFlags fits into an unsigned int. */
+assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned));
+
+int extract_many_words(const char **p, const char *separators, unsigned flags, ...) {
+ va_list ap;
+ char **l;
+ int n = 0, i, c, r;
+
+ /* Parses a number of words from a string, stripping any
+ * quotes if necessary. */
+
+ assert(p);
+
+ /* Count how many words are expected */
+ va_start(ap, flags);
+ for (;;) {
+ if (!va_arg(ap, char **))
+ break;
+ n++;
+ }
+ va_end(ap);
+
+ if (n <= 0)
+ return 0;
+
+ /* Read all words into a temporary array */
+ l = newa0(char*, n);
+ for (c = 0; c < n; c++) {
+
+ r = extract_first_word(p, &l[c], separators, flags);
+ if (r < 0) {
+ int j;
+
+ for (j = 0; j < c; j++)
+ free(l[j]);
+
+ return r;
+ }
+
+ if (r == 0)
+ break;
+ }
+
+ /* If we managed to parse all words, return them in the passed
+ * in parameters */
+ va_start(ap, flags);
+ for (i = 0; i < n; i++) {
+ char **v;
+
+ v = va_arg(ap, char **);
+ assert(v);
+
+ *v = l[i];
+ }
+ va_end(ap);
+
+ return c;
+}
diff --git a/src/basic/extract-word.h b/src/basic/extract-word.h
new file mode 100644
index 0000000..705ebbe
--- /dev/null
+++ b/src/basic/extract-word.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+typedef enum ExtractFlags {
+ EXTRACT_RELAX = 1 << 0,
+ EXTRACT_CUNESCAPE = 1 << 1,
+ EXTRACT_CUNESCAPE_RELAX = 1 << 2,
+ EXTRACT_QUOTES = 1 << 3,
+ EXTRACT_DONT_COALESCE_SEPARATORS = 1 << 4,
+ EXTRACT_RETAIN_ESCAPE = 1 << 5,
+} ExtractFlags;
+
+int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags);
+int extract_first_word_and_warn(const char **p, char **ret, const char *separators, ExtractFlags flags, const char *unit, const char *filename, unsigned line, const char *rvalue);
+int extract_many_words(const char **p, const char *separators, unsigned flags, ...) _sentinel_;
diff --git a/src/basic/fd-util.c b/src/basic/fd-util.c
new file mode 100644
index 0000000..3e6ef5a
--- /dev/null
+++ b/src/basic/fd-util.c
@@ -0,0 +1,967 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "memfd-util.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "util.h"
+#include "tmpfile-util.h"
+
+int close_nointr(int fd) {
+ assert(fd >= 0);
+
+ if (close(fd) >= 0)
+ return 0;
+
+ /*
+ * Just ignore EINTR; a retry loop is the wrong thing to do on
+ * Linux.
+ *
+ * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
+ * https://bugzilla.gnome.org/show_bug.cgi?id=682819
+ * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
+ * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
+ */
+ if (errno == EINTR)
+ return 0;
+
+ return -errno;
+}
+
+int safe_close(int fd) {
+
+ /*
+ * Like close_nointr() but cannot fail. Guarantees errno is
+ * unchanged. Is a NOP with negative fds passed, and returns
+ * -1, so that it can be used in this syntax:
+ *
+ * fd = safe_close(fd);
+ */
+
+ if (fd >= 0) {
+ PROTECT_ERRNO;
+
+ /* The kernel might return pretty much any error code
+ * via close(), but the fd will be closed anyway. The
+ * only condition we want to check for here is whether
+ * the fd was invalid at all... */
+
+ assert_se(close_nointr(fd) != -EBADF);
+ }
+
+ return -1;
+}
+
+void safe_close_pair(int p[static 2]) {
+ assert(p);
+
+ if (p[0] == p[1]) {
+ /* Special case pairs which use the same fd in both
+ * directions... */
+ p[0] = p[1] = safe_close(p[0]);
+ return;
+ }
+
+ p[0] = safe_close(p[0]);
+ p[1] = safe_close(p[1]);
+}
+
+void close_many(const int fds[], size_t n_fd) {
+ size_t i;
+
+ assert(fds || n_fd <= 0);
+
+ for (i = 0; i < n_fd; i++)
+ safe_close(fds[i]);
+}
+
+int fclose_nointr(FILE *f) {
+ assert(f);
+
+ /* Same as close_nointr(), but for fclose() */
+
+ if (fclose(f) == 0)
+ return 0;
+
+ if (errno == EINTR)
+ return 0;
+
+ return -errno;
+}
+
+FILE* safe_fclose(FILE *f) {
+
+ /* Same as safe_close(), but for fclose() */
+
+ if (f) {
+ PROTECT_ERRNO;
+
+ assert_se(fclose_nointr(f) != -EBADF);
+ }
+
+ return NULL;
+}
+
+DIR* safe_closedir(DIR *d) {
+
+ if (d) {
+ PROTECT_ERRNO;
+
+ assert_se(closedir(d) >= 0 || errno != EBADF);
+ }
+
+ return NULL;
+}
+
+int fd_nonblock(int fd, bool nonblock) {
+ int flags, nflags;
+
+ assert(fd >= 0);
+
+ flags = fcntl(fd, F_GETFL, 0);
+ if (flags < 0)
+ return -errno;
+
+ if (nonblock)
+ nflags = flags | O_NONBLOCK;
+ else
+ nflags = flags & ~O_NONBLOCK;
+
+ if (nflags == flags)
+ return 0;
+
+ if (fcntl(fd, F_SETFL, nflags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fd_cloexec(int fd, bool cloexec) {
+ int flags, nflags;
+
+ assert(fd >= 0);
+
+ flags = fcntl(fd, F_GETFD, 0);
+ if (flags < 0)
+ return -errno;
+
+ if (cloexec)
+ nflags = flags | FD_CLOEXEC;
+ else
+ nflags = flags & ~FD_CLOEXEC;
+
+ if (nflags == flags)
+ return 0;
+
+ if (fcntl(fd, F_SETFD, nflags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+_pure_ static bool fd_in_set(int fd, const int fdset[], size_t n_fdset) {
+ size_t i;
+
+ assert(n_fdset == 0 || fdset);
+
+ for (i = 0; i < n_fdset; i++)
+ if (fdset[i] == fd)
+ return true;
+
+ return false;
+}
+
+static int get_max_fd(void) {
+ struct rlimit rl;
+ rlim_t m;
+
+ /* Return the highest possible fd, based RLIMIT_NOFILE, but enforcing FD_SETSIZE-1 as lower boundary
+ * and INT_MAX as upper boundary. */
+
+ if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
+ return -errno;
+
+ m = MAX(rl.rlim_cur, rl.rlim_max);
+ if (m < FD_SETSIZE) /* Let's always cover at least 1024 fds */
+ return FD_SETSIZE-1;
+
+ if (m == RLIM_INFINITY || m > INT_MAX) /* Saturate on overflow. After all fds are "int", hence can
+ * never be above INT_MAX */
+ return INT_MAX;
+
+ return (int) (m - 1);
+}
+
+int close_all_fds(const int except[], size_t n_except) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(n_except == 0 || except);
+
+ d = opendir("/proc/self/fd");
+ if (!d) {
+ int fd, max_fd;
+
+ /* When /proc isn't available (for example in chroots) the fallback is brute forcing through
+ * the fd table */
+
+ max_fd = get_max_fd();
+ if (max_fd < 0)
+ return max_fd;
+
+ for (fd = 3; fd >= 0; fd = fd < max_fd ? fd + 1 : -1) {
+ int q;
+
+ if (fd_in_set(fd, except, n_except))
+ continue;
+
+ q = close_nointr(fd);
+ if (q < 0 && q != -EBADF && r >= 0)
+ r = q;
+ }
+
+ return r;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ int fd = -1, q;
+
+ if (safe_atoi(de->d_name, &fd) < 0)
+ /* Let's better ignore this, just in case */
+ continue;
+
+ if (fd < 3)
+ continue;
+
+ if (fd == dirfd(d))
+ continue;
+
+ if (fd_in_set(fd, except, n_except))
+ continue;
+
+ q = close_nointr(fd);
+ if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
+ r = q;
+ }
+
+ return r;
+}
+
+int same_fd(int a, int b) {
+ struct stat sta, stb;
+ pid_t pid;
+ int r, fa, fb;
+
+ assert(a >= 0);
+ assert(b >= 0);
+
+ /* Compares two file descriptors. Note that semantics are
+ * quite different depending on whether we have kcmp() or we
+ * don't. If we have kcmp() this will only return true for
+ * dup()ed file descriptors, but not otherwise. If we don't
+ * have kcmp() this will also return true for two fds of the same
+ * file, created by separate open() calls. Since we use this
+ * call mostly for filtering out duplicates in the fd store
+ * this difference hopefully doesn't matter too much. */
+
+ if (a == b)
+ return true;
+
+ /* Try to use kcmp() if we have it. */
+ pid = getpid_cached();
+ r = kcmp(pid, pid, KCMP_FILE, a, b);
+ if (r == 0)
+ return true;
+ if (r > 0)
+ return false;
+ if (!IN_SET(errno, ENOSYS, EACCES, EPERM))
+ return -errno;
+
+ /* We don't have kcmp(), use fstat() instead. */
+ if (fstat(a, &sta) < 0)
+ return -errno;
+
+ if (fstat(b, &stb) < 0)
+ return -errno;
+
+ if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
+ return false;
+
+ /* We consider all device fds different, since two device fds
+ * might refer to quite different device contexts even though
+ * they share the same inode and backing dev_t. */
+
+ if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
+ return false;
+
+ if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
+ return false;
+
+ /* The fds refer to the same inode on disk, let's also check
+ * if they have the same fd flags. This is useful to
+ * distinguish the read and write side of a pipe created with
+ * pipe(). */
+ fa = fcntl(a, F_GETFL);
+ if (fa < 0)
+ return -errno;
+
+ fb = fcntl(b, F_GETFL);
+ if (fb < 0)
+ return -errno;
+
+ return fa == fb;
+}
+
+void cmsg_close_all(struct msghdr *mh) {
+ struct cmsghdr *cmsg;
+
+ assert(mh);
+
+ CMSG_FOREACH(cmsg, mh)
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
+ close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
+}
+
+bool fdname_is_valid(const char *s) {
+ const char *p;
+
+ /* Validates a name for $LISTEN_FDNAMES. We basically allow
+ * everything ASCII that's not a control character. Also, as
+ * special exception the ":" character is not allowed, as we
+ * use that as field separator in $LISTEN_FDNAMES.
+ *
+ * Note that the empty string is explicitly allowed
+ * here. However, we limit the length of the names to 255
+ * characters. */
+
+ if (!s)
+ return false;
+
+ for (p = s; *p; p++) {
+ if (*p < ' ')
+ return false;
+ if (*p >= 127)
+ return false;
+ if (*p == ':')
+ return false;
+ }
+
+ return p - s < 256;
+}
+
+int fd_get_path(int fd, char **ret) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ int r;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ r = readlink_malloc(procfs_path, ret);
+ if (r == -ENOENT) {
+ /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's make
+ * things debuggable and distuingish the two. */
+
+ if (access("/proc/self/fd/", F_OK) < 0)
+ /* /proc is not available or not set up properly, we're most likely in some chroot
+ * environment. */
+ return errno == ENOENT ? -EOPNOTSUPP : -errno;
+
+ return -EBADF; /* The directory exists, hence it's the fd that doesn't. */
+ }
+
+ return r;
+}
+
+int move_fd(int from, int to, int cloexec) {
+ int r;
+
+ /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
+ * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
+ * off, if it is > 0 it is turned on. */
+
+ if (from < 0)
+ return -EBADF;
+ if (to < 0)
+ return -EBADF;
+
+ if (from == to) {
+
+ if (cloexec >= 0) {
+ r = fd_cloexec(to, cloexec);
+ if (r < 0)
+ return r;
+ }
+
+ return to;
+ }
+
+ if (cloexec < 0) {
+ int fl;
+
+ fl = fcntl(from, F_GETFD, 0);
+ if (fl < 0)
+ return -errno;
+
+ cloexec = !!(fl & FD_CLOEXEC);
+ }
+
+ r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
+ if (r < 0)
+ return -errno;
+
+ assert(r == to);
+
+ safe_close(from);
+
+ return to;
+}
+
+int acquire_data_fd(const void *data, size_t size, unsigned flags) {
+
+ _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
+ char pattern[] = "/dev/shm/data-fd-XXXXXX";
+ _cleanup_close_ int fd = -1;
+ int isz = 0, r;
+ ssize_t n;
+ off_t f;
+
+ assert(data || size == 0);
+
+ /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
+ * complex than I wish it was. But here's why:
+ *
+ * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
+ * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
+ *
+ * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
+ * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
+ * clients can only bump their size to a system-wide limit, which might be quite low.
+ *
+ * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
+ * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
+ * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
+ *
+ * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
+ *
+ * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
+ * figure. */
+
+ if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
+ /* As a special case, return /dev/null if we have been called for an empty data block */
+ r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (r < 0)
+ return -errno;
+
+ return r;
+ }
+
+ if ((flags & ACQUIRE_NO_MEMFD) == 0) {
+ fd = memfd_new("data-fd");
+ if (fd < 0)
+ goto try_pipe;
+
+ n = write(fd, data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ f = lseek(fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ r = memfd_set_sealed(fd);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(fd);
+ }
+
+try_pipe:
+ if ((flags & ACQUIRE_NO_PIPE) == 0) {
+ if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
+ return -errno;
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ if ((size_t) isz < size) {
+ isz = (int) size;
+ if (isz < 0 || (size_t) isz != size)
+ return -E2BIG;
+
+ /* Try to bump the pipe size */
+ (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
+
+ /* See if that worked */
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ if ((size_t) isz < size)
+ goto try_dev_shm;
+ }
+
+ n = write(pipefds[1], data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ (void) fd_nonblock(pipefds[0], false);
+
+ return TAKE_FD(pipefds[0]);
+ }
+
+try_dev_shm:
+ if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
+ fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
+ if (fd < 0)
+ goto try_dev_shm_without_o_tmpfile;
+
+ n = write(fd, data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
+ return fd_reopen(fd, O_RDONLY|O_CLOEXEC);
+ }
+
+try_dev_shm_without_o_tmpfile:
+ if ((flags & ACQUIRE_NO_REGULAR) == 0) {
+ fd = mkostemp_safe(pattern);
+ if (fd < 0)
+ return fd;
+
+ n = write(fd, data, size);
+ if (n < 0) {
+ r = -errno;
+ goto unlink_and_return;
+ }
+ if ((size_t) n != size) {
+ r = -EIO;
+ goto unlink_and_return;
+ }
+
+ /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
+ r = open(pattern, O_RDONLY|O_CLOEXEC);
+ if (r < 0)
+ r = -errno;
+
+ unlink_and_return:
+ (void) unlink(pattern);
+ return r;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+/* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */
+#define DATA_FD_MEMORY_LIMIT (64U*1024U)
+
+/* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */
+#define DATA_FD_TMP_LIMIT (1024U*1024U)
+
+int fd_duplicate_data_fd(int fd) {
+
+ _cleanup_close_ int copy_fd = -1, tmp_fd = -1;
+ _cleanup_free_ void *remains = NULL;
+ size_t remains_size = 0;
+ const char *td;
+ struct stat st;
+ int r;
+
+ /* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but
+ * independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be
+ * somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported
+ * uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in
+ * /var/tmp. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* For now, let's only accept regular files, sockets, pipes and char devices */
+ if (S_ISDIR(st.st_mode))
+ return -EISDIR;
+ if (S_ISLNK(st.st_mode))
+ return -ELOOP;
+ if (!S_ISREG(st.st_mode) && !S_ISSOCK(st.st_mode) && !S_ISFIFO(st.st_mode) && !S_ISCHR(st.st_mode))
+ return -EBADFD;
+
+ /* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note
+ * that we use the reported regular file size only as a hint, given that there are plenty special files in
+ * /proc and /sys which report a zero file size but can be read from. */
+
+ if (!S_ISREG(st.st_mode) || st.st_size < DATA_FD_MEMORY_LIMIT) {
+
+ /* Try a memfd first */
+ copy_fd = memfd_new("data-fd");
+ if (copy_fd >= 0) {
+ off_t f;
+
+ r = copy_bytes(fd, copy_fd, DATA_FD_MEMORY_LIMIT, 0);
+ if (r < 0)
+ return r;
+
+ f = lseek(copy_fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ if (r == 0) {
+ /* Did it fit into the limit? If so, we are done. */
+ r = memfd_set_sealed(copy_fd);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(copy_fd);
+ }
+
+ /* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */
+
+ } else {
+ _cleanup_(close_pairp) int pipefds[2] = { -1, -1 };
+ int isz;
+
+ /* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather
+ * then block indefinitely when we hit the pipe size limit */
+
+ if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
+ return -errno;
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ /* Try to enlarge the pipe size if necessary */
+ if ((size_t) isz < DATA_FD_MEMORY_LIMIT) {
+
+ (void) fcntl(pipefds[1], F_SETPIPE_SZ, DATA_FD_MEMORY_LIMIT);
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+ }
+
+ if ((size_t) isz >= DATA_FD_MEMORY_LIMIT) {
+
+ r = copy_bytes_full(fd, pipefds[1], DATA_FD_MEMORY_LIMIT, 0, &remains, &remains_size, NULL, NULL);
+ if (r < 0 && r != -EAGAIN)
+ return r; /* If we get EAGAIN it could be because of the source or because of
+ * the destination fd, we can't know, as sendfile() and friends won't
+ * tell us. Hence, treat this as reason to fall back, just to be
+ * sure. */
+ if (r == 0) {
+ /* Everything fit in, yay! */
+ (void) fd_nonblock(pipefds[0], false);
+
+ return TAKE_FD(pipefds[0]);
+ }
+
+ /* Things didn't fit in. But we read data into the pipe, let's remember that, so that
+ * when writing the new file we incorporate this first. */
+ copy_fd = TAKE_FD(pipefds[0]);
+ }
+ }
+ }
+
+ /* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */
+ if ((!S_ISREG(st.st_mode) || st.st_size < DATA_FD_TMP_LIMIT) &&
+ (DATA_FD_MEMORY_LIMIT + remains_size) < DATA_FD_TMP_LIMIT) {
+ off_t f;
+
+ tmp_fd = open_tmpfile_unlinkable(NULL /* NULL as directory means /tmp */, O_RDWR|O_CLOEXEC);
+ if (tmp_fd < 0)
+ return tmp_fd;
+
+ if (copy_fd >= 0) {
+ /* If we tried a memfd/pipe first and it ended up being too large, then copy this into the
+ * temporary file first. */
+
+ r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, 0);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ }
+
+ if (remains_size > 0) {
+ /* If there were remaining bytes (i.e. read into memory, but not written out yet) from the
+ * failed copy operation, let's flush them out next. */
+
+ r = loop_write(tmp_fd, remains, remains_size, false);
+ if (r < 0)
+ return r;
+ }
+
+ r = copy_bytes(fd, tmp_fd, DATA_FD_TMP_LIMIT - DATA_FD_MEMORY_LIMIT - remains_size, COPY_REFLINK);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ goto finish; /* Yay, it fit in */
+
+ /* It didn't fit in. Let's not forget to use what we already used */
+ f = lseek(tmp_fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ safe_close(copy_fd);
+ copy_fd = TAKE_FD(tmp_fd);
+
+ remains = mfree(remains);
+ remains_size = 0;
+ }
+
+ /* As last fallback use /var/tmp */
+ r = var_tmp_dir(&td);
+ if (r < 0)
+ return r;
+
+ tmp_fd = open_tmpfile_unlinkable(td, O_RDWR|O_CLOEXEC);
+ if (tmp_fd < 0)
+ return tmp_fd;
+
+ if (copy_fd >= 0) {
+ /* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this
+ * into the temporary file first. */
+ r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ }
+
+ if (remains_size > 0) {
+ /* Then, copy in any read but not yet written bytes. */
+ r = loop_write(tmp_fd, remains, remains_size, false);
+ if (r < 0)
+ return r;
+ }
+
+ /* Copy in the rest */
+ r = copy_bytes(fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+
+finish:
+ /* Now convert the O_RDWR file descriptor into an O_RDONLY one (and as side effect seek to the beginning of the
+ * file again */
+
+ return fd_reopen(tmp_fd, O_RDONLY|O_CLOEXEC);
+}
+
+int fd_move_above_stdio(int fd) {
+ int flags, copy;
+ PROTECT_ERRNO;
+
+ /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
+ * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
+ * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
+ * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
+ * stdin/stdout/stderr of unrelated code.
+ *
+ * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
+ * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
+ * been closed before.
+ *
+ * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
+ * error we simply return the original file descriptor, and we do not touch errno. */
+
+ if (fd < 0 || fd > 2)
+ return fd;
+
+ flags = fcntl(fd, F_GETFD, 0);
+ if (flags < 0)
+ return fd;
+
+ if (flags & FD_CLOEXEC)
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ else
+ copy = fcntl(fd, F_DUPFD, 3);
+ if (copy < 0)
+ return fd;
+
+ assert(copy > 2);
+
+ (void) close(fd);
+ return copy;
+}
+
+int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {
+
+ int fd[3] = { /* Put together an array of fds we work on */
+ original_input_fd,
+ original_output_fd,
+ original_error_fd
+ };
+
+ int r, i,
+ null_fd = -1, /* if we open /dev/null, we store the fd to it here */
+ copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
+ bool null_readable, null_writable;
+
+ /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
+ * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
+ * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
+ * on.
+ *
+ * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
+ * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
+ *
+ * Note that when this function fails stdin/stdout/stderr might remain half set up!
+ *
+ * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
+ * stdin/stdout/stderr). */
+
+ null_readable = original_input_fd < 0;
+ null_writable = original_output_fd < 0 || original_error_fd < 0;
+
+ /* First step, open /dev/null once, if we need it */
+ if (null_readable || null_writable) {
+
+ /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
+ null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
+ null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
+ if (null_fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ /* If this fd is in the 0…2 range, let's move it out of it */
+ if (null_fd < 3) {
+ int copy;
+
+ copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
+ if (copy < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ safe_close(null_fd);
+ null_fd = copy;
+ }
+ }
+
+ /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
+ for (i = 0; i < 3; i++) {
+
+ if (fd[i] < 0)
+ fd[i] = null_fd; /* A negative parameter means: connect this one to /dev/null */
+ else if (fd[i] != i && fd[i] < 3) {
+ /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
+ copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
+ if (copy_fd[i] < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ fd[i] = copy_fd[i];
+ }
+ }
+
+ /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
+ * have freedom to move them around. If the fds already were at the right places then the specific fds are
+ * -1. Let's now move them to the right places. This is the point of no return. */
+ for (i = 0; i < 3; i++) {
+
+ if (fd[i] == i) {
+
+ /* fd is already in place, but let's make sure O_CLOEXEC is off */
+ r = fd_cloexec(i, false);
+ if (r < 0)
+ goto finish;
+
+ } else {
+ assert(fd[i] > 2);
+
+ if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
+ r = -errno;
+ goto finish;
+ }
+ }
+ }
+
+ r = 0;
+
+finish:
+ /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
+ * fd passed in multiple times. */
+ safe_close_above_stdio(original_input_fd);
+ if (original_output_fd != original_input_fd)
+ safe_close_above_stdio(original_output_fd);
+ if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
+ safe_close_above_stdio(original_error_fd);
+
+ /* Close the copies we moved > 2 */
+ for (i = 0; i < 3; i++)
+ safe_close(copy_fd[i]);
+
+ /* Close our null fd, if it's > 2 */
+ safe_close_above_stdio(null_fd);
+
+ return r;
+}
+
+int fd_reopen(int fd, int flags) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ int new_fd;
+
+ /* Reopens the specified fd with new flags. This is useful for convert an O_PATH fd into a regular one, or to
+ * turn O_RDWR fds into O_RDONLY fds.
+ *
+ * This doesn't work on sockets (since they cannot be open()ed, ever).
+ *
+ * This implicitly resets the file read index to 0. */
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ new_fd = open(procfs_path, flags);
+ if (new_fd < 0)
+ return -errno;
+
+ return new_fd;
+}
+
+int read_nr_open(void) {
+ _cleanup_free_ char *nr_open = NULL;
+ int r;
+
+ /* Returns the kernel's current fd limit, either by reading it of /proc/sys if that works, or using the
+ * hard-coded default compiled-in value of current kernels (1M) if not. This call will never fail. */
+
+ r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /proc/sys/fs/nr_open, ignoring: %m");
+ else {
+ int v;
+
+ r = safe_atoi(nr_open, &v);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse /proc/sys/fs/nr_open value '%s', ignoring: %m", nr_open);
+ else
+ return v;
+ }
+
+ /* If we fail, fallback to the hard-coded kernel limit of 1024 * 1024. */
+ return 1024 * 1024;
+}
diff --git a/src/basic/fd-util.h b/src/basic/fd-util.h
new file mode 100644
index 0000000..4085a24
--- /dev/null
+++ b/src/basic/fd-util.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+
+#include "macro.h"
+
+/* Make sure we can distinguish fd 0 and NULL */
+#define FD_TO_PTR(fd) INT_TO_PTR((fd)+1)
+#define PTR_TO_FD(p) (PTR_TO_INT(p)-1)
+
+int close_nointr(int fd);
+int safe_close(int fd);
+void safe_close_pair(int p[static 2]);
+
+static inline int safe_close_above_stdio(int fd) {
+ if (fd < 3) /* Don't close stdin/stdout/stderr, but still invalidate the fd by returning -1 */
+ return -1;
+
+ return safe_close(fd);
+}
+
+void close_many(const int fds[], size_t n_fd);
+
+int fclose_nointr(FILE *f);
+FILE* safe_fclose(FILE *f);
+DIR* safe_closedir(DIR *f);
+
+static inline void closep(int *fd) {
+ safe_close(*fd);
+}
+
+static inline void close_pairp(int (*p)[2]) {
+ safe_close_pair(*p);
+}
+
+static inline void fclosep(FILE **f) {
+ safe_fclose(*f);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, pclose);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DIR*, closedir);
+
+#define _cleanup_close_ _cleanup_(closep)
+#define _cleanup_fclose_ _cleanup_(fclosep)
+#define _cleanup_pclose_ _cleanup_(pclosep)
+#define _cleanup_closedir_ _cleanup_(closedirp)
+#define _cleanup_close_pair_ _cleanup_(close_pairp)
+
+int fd_nonblock(int fd, bool nonblock);
+int fd_cloexec(int fd, bool cloexec);
+
+int close_all_fds(const int except[], size_t n_except);
+
+int same_fd(int a, int b);
+
+void cmsg_close_all(struct msghdr *mh);
+
+bool fdname_is_valid(const char *s);
+
+int fd_get_path(int fd, char **ret);
+
+int move_fd(int from, int to, int cloexec);
+
+enum {
+ ACQUIRE_NO_DEV_NULL = 1 << 0,
+ ACQUIRE_NO_MEMFD = 1 << 1,
+ ACQUIRE_NO_PIPE = 1 << 2,
+ ACQUIRE_NO_TMPFILE = 1 << 3,
+ ACQUIRE_NO_REGULAR = 1 << 4,
+};
+
+int acquire_data_fd(const void *data, size_t size, unsigned flags);
+
+int fd_duplicate_data_fd(int fd);
+
+/* Hint: ENETUNREACH happens if we try to connect to "non-existing" special IP addresses, such as ::5 */
+/* The kernel sends e.g., EHOSTUNREACH or ENONET to userspace in some ICMP error cases.
+ * See the icmp_err_convert[] in net/ipv4/icmp.c in the kernel sources */
+#define ERRNO_IS_DISCONNECT(r) \
+ IN_SET(r, \
+ ENOTCONN, ECONNRESET, ECONNREFUSED, ECONNABORTED, EPIPE, \
+ ENETUNREACH, EHOSTUNREACH, ENOPROTOOPT, EHOSTDOWN, ENONET)
+
+/* Resource exhaustion, could be our fault or general system trouble */
+#define ERRNO_IS_RESOURCE(r) \
+ IN_SET(r, ENOMEM, EMFILE, ENFILE)
+
+int fd_move_above_stdio(int fd);
+
+int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd);
+
+static inline int make_null_stdio(void) {
+ return rearrange_stdio(-1, -1, -1);
+}
+
+/* Like TAKE_PTR() but for file descriptors, resetting them to -1 */
+#define TAKE_FD(fd) \
+ ({ \
+ int _fd_ = (fd); \
+ (fd) = -1; \
+ _fd_; \
+ })
+
+int fd_reopen(int fd, int flags);
+
+int read_nr_open(void);
diff --git a/src/basic/fileio.c b/src/basic/fileio.c
new file mode 100644
index 0000000..e18b842
--- /dev/null
+++ b/src/basic/fileio.c
@@ -0,0 +1,822 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+
+#define READ_FULL_BYTES_MAX (4U*1024U*1024U)
+
+int write_string_stream_ts(
+ FILE *f,
+ const char *line,
+ WriteStringFileFlags flags,
+ struct timespec *ts) {
+
+ bool needs_nl;
+ int r;
+
+ assert(f);
+ assert(line);
+
+ if (ferror(f))
+ return -EIO;
+
+ needs_nl = !(flags & WRITE_STRING_FILE_AVOID_NEWLINE) && !endswith(line, "\n");
+
+ if (needs_nl && (flags & WRITE_STRING_FILE_DISABLE_BUFFER)) {
+ /* If STDIO buffering was disabled, then let's append the newline character to the string itself, so
+ * that the write goes out in one go, instead of two */
+
+ line = strjoina(line, "\n");
+ needs_nl = false;
+ }
+
+ if (fputs(line, f) == EOF)
+ return -errno;
+
+ if (needs_nl)
+ if (fputc('\n', f) == EOF)
+ return -errno;
+
+ if (flags & WRITE_STRING_FILE_SYNC)
+ r = fflush_sync_and_check(f);
+ else
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ if (ts) {
+ struct timespec twice[2] = {*ts, *ts};
+
+ if (futimens(fileno(f), twice) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int write_string_file_atomic(
+ const char *fn,
+ const char *line,
+ WriteStringFileFlags flags,
+ struct timespec *ts) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(fn);
+ assert(line);
+
+ r = fopen_temporary(fn, &f, &p);
+ if (r < 0)
+ return r;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod_umask(fileno(f), 0644);
+
+ r = write_string_stream_ts(f, line, flags, ts);
+ if (r < 0)
+ goto fail;
+
+ if (rename(p, fn) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(p);
+ return r;
+}
+
+int write_string_file_ts(
+ const char *fn,
+ const char *line,
+ WriteStringFileFlags flags,
+ struct timespec *ts) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ int q, r;
+
+ assert(fn);
+ assert(line);
+
+ /* We don't know how to verify whether the file contents was already on-disk. */
+ assert(!((flags & WRITE_STRING_FILE_VERIFY_ON_FAILURE) && (flags & WRITE_STRING_FILE_SYNC)));
+
+ if (flags & WRITE_STRING_FILE_ATOMIC) {
+ assert(flags & WRITE_STRING_FILE_CREATE);
+
+ r = write_string_file_atomic(fn, line, flags, ts);
+ if (r < 0)
+ goto fail;
+
+ return r;
+ } else
+ assert(!ts);
+
+ if (flags & WRITE_STRING_FILE_CREATE) {
+ f = fopen(fn, "we");
+ if (!f) {
+ r = -errno;
+ goto fail;
+ }
+ } else {
+ int fd;
+
+ /* We manually build our own version of fopen(..., "we") that
+ * works without O_CREAT */
+ fd = open(fn, O_WRONLY|O_CLOEXEC|O_NOCTTY | ((flags & WRITE_STRING_FILE_NOFOLLOW) ? O_NOFOLLOW : 0));
+ if (fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ f = fdopen(fd, "w");
+ if (!f) {
+ r = -errno;
+ safe_close(fd);
+ goto fail;
+ }
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ if (flags & WRITE_STRING_FILE_DISABLE_BUFFER)
+ setvbuf(f, NULL, _IONBF, 0);
+
+ r = write_string_stream_ts(f, line, flags, ts);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ if (!(flags & WRITE_STRING_FILE_VERIFY_ON_FAILURE))
+ return r;
+
+ f = safe_fclose(f);
+
+ /* OK, the operation failed, but let's see if the right
+ * contents in place already. If so, eat up the error. */
+
+ q = verify_file(fn, line, !(flags & WRITE_STRING_FILE_AVOID_NEWLINE));
+ if (q <= 0)
+ return r;
+
+ return 0;
+}
+
+int write_string_filef(
+ const char *fn,
+ WriteStringFileFlags flags,
+ const char *format, ...) {
+
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return write_string_file(fn, p, flags);
+}
+
+int read_one_line_file(const char *fn, char **line) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(fn);
+ assert(line);
+
+ f = fopen(fn, "re");
+ if (!f)
+ return -errno;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ r = read_line(f, LONG_LINE_MAX, line);
+ return r < 0 ? r : 0;
+}
+
+int verify_file(const char *fn, const char *blob, bool accept_extra_nl) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *buf = NULL;
+ size_t l, k;
+
+ assert(fn);
+ assert(blob);
+
+ l = strlen(blob);
+
+ if (accept_extra_nl && endswith(blob, "\n"))
+ accept_extra_nl = false;
+
+ buf = malloc(l + accept_extra_nl + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ f = fopen(fn, "re");
+ if (!f)
+ return -errno;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ /* We try to read one byte more than we need, so that we know whether we hit eof */
+ errno = 0;
+ k = fread(buf, 1, l + accept_extra_nl + 1, f);
+ if (ferror(f))
+ return errno > 0 ? -errno : -EIO;
+
+ if (k != l && k != l + accept_extra_nl)
+ return 0;
+ if (memcmp(buf, blob, l) != 0)
+ return 0;
+ if (k > l && buf[l] != '\n')
+ return 0;
+
+ return 1;
+}
+
+int read_full_stream(
+ FILE *f,
+ char **ret_contents,
+ size_t *ret_size) {
+
+ _cleanup_free_ char *buf = NULL;
+ struct stat st;
+ size_t n, l;
+ int fd;
+
+ assert(f);
+ assert(ret_contents);
+
+ n = LINE_MAX; /* Start size */
+
+ fd = fileno(f);
+ if (fd >= 0) { /* If the FILE* object is backed by an fd (as opposed to memory or such, see fmemopen(), let's
+ * optimize our buffering) */
+
+ if (fstat(fileno(f), &st) < 0)
+ return -errno;
+
+ if (S_ISREG(st.st_mode)) {
+
+ /* Safety check */
+ if (st.st_size > READ_FULL_BYTES_MAX)
+ return -E2BIG;
+
+ /* Start with the right file size, but be prepared for files from /proc which generally report a file
+ * size of 0. Note that we increase the size to read here by one, so that the first read attempt
+ * already makes us notice the EOF. */
+ if (st.st_size > 0)
+ n = st.st_size + 1;
+ }
+ }
+
+ l = 0;
+ for (;;) {
+ char *t;
+ size_t k;
+
+ t = realloc(buf, n + 1);
+ if (!t)
+ return -ENOMEM;
+
+ buf = t;
+ errno = 0;
+ k = fread(buf + l, 1, n - l, f);
+ if (k > 0)
+ l += k;
+
+ if (ferror(f))
+ return errno > 0 ? -errno : -EIO;
+
+ if (feof(f))
+ break;
+
+ /* We aren't expecting fread() to return a short read outside
+ * of (error && eof), assert buffer is full and enlarge buffer.
+ */
+ assert(l == n);
+
+ /* Safety check */
+ if (n >= READ_FULL_BYTES_MAX)
+ return -E2BIG;
+
+ n = MIN(n * 2, READ_FULL_BYTES_MAX);
+ }
+
+ if (!ret_size) {
+ /* Safety check: if the caller doesn't want to know the size of what we just read it will rely on the
+ * trailing NUL byte. But if there's an embedded NUL byte, then we should refuse operation as otherwise
+ * there'd be ambiguity about what we just read. */
+
+ if (memchr(buf, 0, l))
+ return -EBADMSG;
+ }
+
+ buf[l] = 0;
+ *ret_contents = TAKE_PTR(buf);
+
+ if (ret_size)
+ *ret_size = l;
+
+ return 0;
+}
+
+int read_full_file(const char *fn, char **contents, size_t *size) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ assert(fn);
+ assert(contents);
+
+ f = fopen(fn, "re");
+ if (!f)
+ return -errno;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ return read_full_stream(f, contents, size);
+}
+
+int executable_is_script(const char *path, char **interpreter) {
+ _cleanup_free_ char *line = NULL;
+ size_t len;
+ char *ans;
+ int r;
+
+ assert(path);
+
+ r = read_one_line_file(path, &line);
+ if (r == -ENOBUFS) /* First line overly long? if so, then it's not a script */
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (!startswith(line, "#!"))
+ return 0;
+
+ ans = strstrip(line + 2);
+ len = strcspn(ans, " \t");
+
+ if (len == 0)
+ return 0;
+
+ ans = strndup(ans, len);
+ if (!ans)
+ return -ENOMEM;
+
+ *interpreter = ans;
+ return 1;
+}
+
+/**
+ * Retrieve one field from a file like /proc/self/status. pattern
+ * should not include whitespace or the delimiter (':'). pattern matches only
+ * the beginning of a line. Whitespace before ':' is skipped. Whitespace and
+ * zeros after the ':' will be skipped. field must be freed afterwards.
+ * terminator specifies the terminating characters of the field value (not
+ * included in the value).
+ */
+int get_proc_field(const char *filename, const char *pattern, const char *terminator, char **field) {
+ _cleanup_free_ char *status = NULL;
+ char *t, *f;
+ size_t len;
+ int r;
+
+ assert(terminator);
+ assert(filename);
+ assert(pattern);
+ assert(field);
+
+ r = read_full_file(filename, &status, NULL);
+ if (r < 0)
+ return r;
+
+ t = status;
+
+ do {
+ bool pattern_ok;
+
+ do {
+ t = strstr(t, pattern);
+ if (!t)
+ return -ENOENT;
+
+ /* Check that pattern occurs in beginning of line. */
+ pattern_ok = (t == status || t[-1] == '\n');
+
+ t += strlen(pattern);
+
+ } while (!pattern_ok);
+
+ t += strspn(t, " \t");
+ if (!*t)
+ return -ENOENT;
+
+ } while (*t != ':');
+
+ t++;
+
+ if (*t) {
+ t += strspn(t, " \t");
+
+ /* Also skip zeros, because when this is used for
+ * capabilities, we don't want the zeros. This way the
+ * same capability set always maps to the same string,
+ * irrespective of the total capability set size. For
+ * other numbers it shouldn't matter. */
+ t += strspn(t, "0");
+ /* Back off one char if there's nothing but whitespace
+ and zeros */
+ if (!*t || isspace(*t))
+ t--;
+ }
+
+ len = strcspn(t, terminator);
+
+ f = strndup(t, len);
+ if (!f)
+ return -ENOMEM;
+
+ *field = f;
+ return 0;
+}
+
+DIR *xopendirat(int fd, const char *name, int flags) {
+ int nfd;
+ DIR *d;
+
+ assert(!(flags & O_CREAT));
+
+ nfd = openat(fd, name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|flags, 0);
+ if (nfd < 0)
+ return NULL;
+
+ d = fdopendir(nfd);
+ if (!d) {
+ safe_close(nfd);
+ return NULL;
+ }
+
+ return d;
+}
+
+static int search_and_fopen_internal(const char *path, const char *mode, const char *root, char **search, FILE **_f) {
+ char **i;
+
+ assert(path);
+ assert(mode);
+ assert(_f);
+
+ if (!path_strv_resolve_uniq(search, root))
+ return -ENOMEM;
+
+ STRV_FOREACH(i, search) {
+ _cleanup_free_ char *p = NULL;
+ FILE *f;
+
+ if (root)
+ p = strjoin(root, *i, "/", path);
+ else
+ p = strjoin(*i, "/", path);
+ if (!p)
+ return -ENOMEM;
+
+ f = fopen(p, mode);
+ if (f) {
+ *_f = f;
+ return 0;
+ }
+
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ return -ENOENT;
+}
+
+int search_and_fopen(const char *path, const char *mode, const char *root, const char **search, FILE **_f) {
+ _cleanup_strv_free_ char **copy = NULL;
+
+ assert(path);
+ assert(mode);
+ assert(_f);
+
+ if (path_is_absolute(path)) {
+ FILE *f;
+
+ f = fopen(path, mode);
+ if (f) {
+ *_f = f;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ copy = strv_copy((char**) search);
+ if (!copy)
+ return -ENOMEM;
+
+ return search_and_fopen_internal(path, mode, root, copy, _f);
+}
+
+int search_and_fopen_nulstr(const char *path, const char *mode, const char *root, const char *search, FILE **_f) {
+ _cleanup_strv_free_ char **s = NULL;
+
+ if (path_is_absolute(path)) {
+ FILE *f;
+
+ f = fopen(path, mode);
+ if (f) {
+ *_f = f;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ s = strv_split_nulstr(search);
+ if (!s)
+ return -ENOMEM;
+
+ return search_and_fopen_internal(path, mode, root, s, _f);
+}
+
+int fflush_and_check(FILE *f) {
+ assert(f);
+
+ errno = 0;
+ fflush(f);
+
+ if (ferror(f))
+ return errno > 0 ? -errno : -EIO;
+
+ return 0;
+}
+
+int fflush_sync_and_check(FILE *f) {
+ int r;
+
+ assert(f);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ if (fsync(fileno(f)) < 0)
+ return -errno;
+
+ r = fsync_directory_of_file(fileno(f));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int write_timestamp_file_atomic(const char *fn, usec_t n) {
+ char ln[DECIMAL_STR_MAX(n)+2];
+
+ /* Creates a "timestamp" file, that contains nothing but a
+ * usec_t timestamp, formatted in ASCII. */
+
+ if (n <= 0 || n >= USEC_INFINITY)
+ return -ERANGE;
+
+ xsprintf(ln, USEC_FMT "\n", n);
+
+ return write_string_file(fn, ln, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC);
+}
+
+int read_timestamp_file(const char *fn, usec_t *ret) {
+ _cleanup_free_ char *ln = NULL;
+ uint64_t t;
+ int r;
+
+ r = read_one_line_file(fn, &ln);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(ln, &t);
+ if (r < 0)
+ return r;
+
+ if (t <= 0 || t >= (uint64_t) USEC_INFINITY)
+ return -ERANGE;
+
+ *ret = (usec_t) t;
+ return 0;
+}
+
+int fputs_with_space(FILE *f, const char *s, const char *separator, bool *space) {
+ int r;
+
+ assert(s);
+
+ /* Outputs the specified string with fputs(), but optionally prefixes it with a separator. The *space parameter
+ * when specified shall initially point to a boolean variable initialized to false. It is set to true after the
+ * first invocation. This call is supposed to be use in loops, where a separator shall be inserted between each
+ * element, but not before the first one. */
+
+ if (!f)
+ f = stdout;
+
+ if (space) {
+ if (!separator)
+ separator = " ";
+
+ if (*space) {
+ r = fputs(separator, f);
+ if (r < 0)
+ return r;
+ }
+
+ *space = true;
+ }
+
+ return fputs(s, f);
+}
+
+/* A bitmask of the EOL markers we know */
+typedef enum EndOfLineMarker {
+ EOL_NONE = 0,
+ EOL_ZERO = 1 << 0, /* \0 (aka NUL) */
+ EOL_TEN = 1 << 1, /* \n (aka NL, aka LF) */
+ EOL_THIRTEEN = 1 << 2, /* \r (aka CR) */
+} EndOfLineMarker;
+
+static EndOfLineMarker categorize_eol(char c, ReadLineFlags flags) {
+
+ if (!IN_SET(flags, READ_LINE_ONLY_NUL)) {
+ if (c == '\n')
+ return EOL_TEN;
+ if (c == '\r')
+ return EOL_THIRTEEN;
+ }
+
+ if (c == '\0')
+ return EOL_ZERO;
+
+ return EOL_NONE;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, funlockfile);
+
+int read_line_full(FILE *f, size_t limit, ReadLineFlags flags, char **ret) {
+ size_t n = 0, allocated = 0, count = 0;
+ _cleanup_free_ char *buffer = NULL;
+ int r;
+
+ assert(f);
+
+ /* Something like a bounded version of getline().
+ *
+ * Considers EOF, \n, \r and \0 end of line delimiters (or combinations of these), and does not include these
+ * delimiters in the string returned. Specifically, recognizes the following combinations of markers as line
+ * endings:
+ *
+ * • \n (UNIX)
+ * • \r (old MacOS)
+ * • \0 (C strings)
+ * • \n\0
+ * • \r\0
+ * • \r\n (Windows)
+ * • \n\r
+ * • \r\n\0
+ * • \n\r\0
+ *
+ * Returns the number of bytes read from the files (i.e. including delimiters — this hence usually differs from
+ * the number of characters in the returned string). When EOF is hit, 0 is returned.
+ *
+ * The input parameter limit is the maximum numbers of characters in the returned string, i.e. excluding
+ * delimiters. If the limit is hit we fail and return -ENOBUFS.
+ *
+ * If a line shall be skipped ret may be initialized as NULL. */
+
+ if (ret) {
+ if (!GREEDY_REALLOC(buffer, allocated, 1))
+ return -ENOMEM;
+ }
+
+ {
+ _unused_ _cleanup_(funlockfilep) FILE *flocked = f;
+ EndOfLineMarker previous_eol = EOL_NONE;
+ flockfile(f);
+
+ for (;;) {
+ EndOfLineMarker eol;
+ char c;
+
+ if (n >= limit)
+ return -ENOBUFS;
+
+ if (count >= INT_MAX) /* We couldn't return the counter anymore as "int", hence refuse this */
+ return -ENOBUFS;
+
+ r = safe_fgetc(f, &c);
+ if (r < 0)
+ return r;
+ if (r == 0) /* EOF is definitely EOL */
+ break;
+
+ eol = categorize_eol(c, flags);
+
+ if (FLAGS_SET(previous_eol, EOL_ZERO) ||
+ (eol == EOL_NONE && previous_eol != EOL_NONE) ||
+ (eol != EOL_NONE && (previous_eol & eol) != 0)) {
+ /* Previous char was a NUL? This is not an EOL, but the previous char was? This type of
+ * EOL marker has been seen right before? In either of these three cases we are
+ * done. But first, let's put this character back in the queue. (Note that we have to
+ * cast this to (unsigned char) here as ungetc() expects a positive 'int', and if we
+ * are on an architecture where 'char' equals 'signed char' we need to ensure we don't
+ * pass a negative value here. That said, to complicate things further ungetc() is
+ * actually happy with most negative characters and implicitly casts them back to
+ * positive ones as needed, except for \xff (aka -1, aka EOF), which it refuses. What a
+ * godawful API!) */
+ assert_se(ungetc((unsigned char) c, f) != EOF);
+ break;
+ }
+
+ count++;
+
+ if (eol != EOL_NONE) {
+ previous_eol |= eol;
+ continue;
+ }
+
+ if (ret) {
+ if (!GREEDY_REALLOC(buffer, allocated, n + 2))
+ return -ENOMEM;
+
+ buffer[n] = c;
+ }
+
+ n++;
+ }
+ }
+
+ if (ret) {
+ buffer[n] = 0;
+
+ *ret = TAKE_PTR(buffer);
+ }
+
+ return (int) count;
+}
+
+int safe_fgetc(FILE *f, char *ret) {
+ int k;
+
+ assert(f);
+
+ /* A safer version of plain fgetc(): let's propagate the error that happened while reading as such, and
+ * separate the EOF condition from the byte read, to avoid those confusion signed/unsigned issues fgetc()
+ * has. */
+
+ errno = 0;
+ k = fgetc(f);
+ if (k == EOF) {
+ if (ferror(f))
+ return errno > 0 ? -errno : -EIO;
+
+ if (ret)
+ *ret = 0;
+
+ return 0;
+ }
+
+ if (ret)
+ *ret = k;
+
+ return 1;
+}
diff --git a/src/basic/fileio.h b/src/basic/fileio.h
new file mode 100644
index 0000000..53e3f4e
--- /dev/null
+++ b/src/basic/fileio.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <dirent.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "time-util.h"
+
+#define LONG_LINE_MAX (1U*1024U*1024U)
+
+typedef enum {
+ WRITE_STRING_FILE_CREATE = 1 << 0,
+ WRITE_STRING_FILE_ATOMIC = 1 << 1,
+ WRITE_STRING_FILE_AVOID_NEWLINE = 1 << 2,
+ WRITE_STRING_FILE_VERIFY_ON_FAILURE = 1 << 3,
+ WRITE_STRING_FILE_SYNC = 1 << 4,
+ WRITE_STRING_FILE_DISABLE_BUFFER = 1 << 5,
+ WRITE_STRING_FILE_NOFOLLOW = 1 << 6,
+
+ /* And before you wonder, why write_string_file_atomic_label_ts() is a separate function instead of just one
+ more flag here: it's about linking: we don't want to pull -lselinux into all users of write_string_file()
+ and friends. */
+
+} WriteStringFileFlags;
+
+int write_string_stream_ts(FILE *f, const char *line, WriteStringFileFlags flags, struct timespec *ts);
+static inline int write_string_stream(FILE *f, const char *line, WriteStringFileFlags flags) {
+ return write_string_stream_ts(f, line, flags, NULL);
+}
+int write_string_file_ts(const char *fn, const char *line, WriteStringFileFlags flags, struct timespec *ts);
+static inline int write_string_file(const char *fn, const char *line, WriteStringFileFlags flags) {
+ return write_string_file_ts(fn, line, flags, NULL);
+}
+
+int write_string_filef(const char *fn, WriteStringFileFlags flags, const char *format, ...) _printf_(3, 4);
+
+int read_one_line_file(const char *fn, char **line);
+int read_full_file(const char *fn, char **contents, size_t *size);
+int read_full_stream(FILE *f, char **contents, size_t *size);
+
+int verify_file(const char *fn, const char *blob, bool accept_extra_nl);
+
+int executable_is_script(const char *path, char **interpreter);
+
+int get_proc_field(const char *filename, const char *pattern, const char *terminator, char **field);
+
+DIR *xopendirat(int dirfd, const char *name, int flags);
+
+int search_and_fopen(const char *path, const char *mode, const char *root, const char **search, FILE **_f);
+int search_and_fopen_nulstr(const char *path, const char *mode, const char *root, const char *search, FILE **_f);
+
+int fflush_and_check(FILE *f);
+int fflush_sync_and_check(FILE *f);
+
+int write_timestamp_file_atomic(const char *fn, usec_t n);
+int read_timestamp_file(const char *fn, usec_t *ret);
+
+int fputs_with_space(FILE *f, const char *s, const char *separator, bool *space);
+
+typedef enum ReadLineFlags {
+ READ_LINE_ONLY_NUL = 1 << 0,
+} ReadLineFlags;
+
+int read_line_full(FILE *f, size_t limit, ReadLineFlags flags, char **ret);
+
+static inline int read_line(FILE *f, size_t limit, char **ret) {
+ return read_line_full(f, limit, 0, ret);
+}
+
+static inline int read_nul_string(FILE *f, size_t limit, char **ret) {
+ return read_line_full(f, limit, READ_LINE_ONLY_NUL, ret);
+}
+
+int safe_fgetc(FILE *f, char *ret);
diff --git a/src/basic/format-util.h b/src/basic/format-util.h
new file mode 100644
index 0000000..dece5d3
--- /dev/null
+++ b/src/basic/format-util.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+
+#if SIZEOF_PID_T == 4
+# define PID_PRI PRIi32
+#elif SIZEOF_PID_T == 2
+# define PID_PRI PRIi16
+#else
+# error Unknown pid_t size
+#endif
+#define PID_FMT "%" PID_PRI
+
+#if SIZEOF_UID_T == 4
+# define UID_FMT "%" PRIu32
+#elif SIZEOF_UID_T == 2
+# define UID_FMT "%" PRIu16
+#else
+# error Unknown uid_t size
+#endif
+
+#if SIZEOF_GID_T == 4
+# define GID_FMT "%" PRIu32
+#elif SIZEOF_GID_T == 2
+# define GID_FMT "%" PRIu16
+#else
+# error Unknown gid_t size
+#endif
+
+#if SIZEOF_TIME_T == 8
+# define PRI_TIME PRIi64
+#elif SIZEOF_TIME_T == 4
+# define PRI_TIME "li"
+#else
+# error Unknown time_t size
+#endif
+
+#if defined __x86_64__ && defined __ILP32__
+# define PRI_TIMEX PRIi64
+#else
+# define PRI_TIMEX "li"
+#endif
+
+#if SIZEOF_RLIM_T == 8
+# define RLIM_FMT "%" PRIu64
+#elif SIZEOF_RLIM_T == 4
+# define RLIM_FMT "%" PRIu32
+#else
+# error Unknown rlim_t size
+#endif
+
+#if SIZEOF_DEV_T == 8
+# define DEV_FMT "%" PRIu64
+#elif SIZEOF_DEV_T == 4
+# define DEV_FMT "%" PRIu32
+#else
+# error Unknown dev_t size
+#endif
+
+#if SIZEOF_INO_T == 8
+# define INO_FMT "%" PRIu64
+#elif SIZEOF_INO_T == 4
+# define INO_FMT "%" PRIu32
+#else
+# error Unknown ino_t size
+#endif
diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c
new file mode 100644
index 0000000..f25bf2c
--- /dev/null
+++ b/src/basic/fs-util.c
@@ -0,0 +1,1358 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <linux/magic.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int unlink_noerrno(const char *path) {
+ PROTECT_ERRNO;
+ int r;
+
+ r = unlink(path);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int rmdir_parents(const char *path, const char *stop) {
+ size_t l;
+ int r = 0;
+
+ assert(path);
+ assert(stop);
+
+ l = strlen(path);
+
+ /* Skip trailing slashes */
+ while (l > 0 && path[l-1] == '/')
+ l--;
+
+ while (l > 0) {
+ char *t;
+
+ /* Skip last component */
+ while (l > 0 && path[l-1] != '/')
+ l--;
+
+ /* Skip trailing slashes */
+ while (l > 0 && path[l-1] == '/')
+ l--;
+
+ if (l <= 0)
+ break;
+
+ t = strndup(path, l);
+ if (!t)
+ return -ENOMEM;
+
+ if (path_startswith(stop, t)) {
+ free(t);
+ return 0;
+ }
+
+ r = rmdir(t);
+ free(t);
+
+ if (r < 0)
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) {
+ int r;
+
+ /* Try the ideal approach first */
+ if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_NOREPLACE) >= 0)
+ return 0;
+
+ /* renameat2() exists since Linux 3.15, btrfs and FAT added support for it later. If it is not implemented,
+ * fall back to a different method. */
+ if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY))
+ return -errno;
+
+ /* Let's try to use linkat()+unlinkat() as fallback. This doesn't work on directories and on some file systems
+ * that do not support hard links (such as FAT, most prominently), but for files it's pretty close to what we
+ * want — though not atomic (i.e. for a short period both the new and the old filename will exist). */
+ if (linkat(olddirfd, oldpath, newdirfd, newpath, 0) >= 0) {
+
+ if (unlinkat(olddirfd, oldpath, 0) < 0) {
+ r = -errno; /* Backup errno before the following unlinkat() alters it */
+ (void) unlinkat(newdirfd, newpath, 0);
+ return r;
+ }
+
+ return 0;
+ }
+
+ if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY, EPERM)) /* FAT returns EPERM on link()… */
+ return -errno;
+
+ /* OK, neither RENAME_NOREPLACE nor linkat()+unlinkat() worked. Let's then fallback to the racy TOCTOU
+ * vulnerable accessat(F_OK) check followed by classic, replacing renameat(), we have nothing better. */
+
+ if (faccessat(newdirfd, newpath, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
+ return -EEXIST;
+ if (errno != ENOENT)
+ return -errno;
+
+ if (renameat(olddirfd, oldpath, newdirfd, newpath) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int readlinkat_malloc(int fd, const char *p, char **ret) {
+ size_t l = FILENAME_MAX+1;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ for (;;) {
+ char *c;
+ ssize_t n;
+
+ c = new(char, l);
+ if (!c)
+ return -ENOMEM;
+
+ n = readlinkat(fd, p, c, l-1);
+ if (n < 0) {
+ r = -errno;
+ free(c);
+ return r;
+ }
+
+ if ((size_t) n < l-1) {
+ c[n] = 0;
+ *ret = c;
+ return 0;
+ }
+
+ free(c);
+ l *= 2;
+ }
+}
+
+int readlink_malloc(const char *p, char **ret) {
+ return readlinkat_malloc(AT_FDCWD, p, ret);
+}
+
+int readlink_value(const char *p, char **ret) {
+ _cleanup_free_ char *link = NULL;
+ char *value;
+ int r;
+
+ r = readlink_malloc(p, &link);
+ if (r < 0)
+ return r;
+
+ value = basename(link);
+ if (!value)
+ return -ENOENT;
+
+ value = strdup(value);
+ if (!value)
+ return -ENOMEM;
+
+ *ret = value;
+
+ return 0;
+}
+
+int readlink_and_make_absolute(const char *p, char **r) {
+ _cleanup_free_ char *target = NULL;
+ char *k;
+ int j;
+
+ assert(p);
+ assert(r);
+
+ j = readlink_malloc(p, &target);
+ if (j < 0)
+ return j;
+
+ k = file_in_same_dir(p, target);
+ if (!k)
+ return -ENOMEM;
+
+ *r = k;
+ return 0;
+}
+
+int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) {
+ char fd_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ _cleanup_close_ int fd = -1;
+ assert(path);
+
+ /* Under the assumption that we are running privileged we first change the access mode and only then hand out
+ * ownership to avoid a window where access is too open. */
+
+ fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW); /* Let's acquire an O_PATH fd, as precaution to change mode/owner
+ * on the same file */
+ if (fd < 0)
+ return -errno;
+
+ xsprintf(fd_path, "/proc/self/fd/%i", fd);
+
+ if (mode != MODE_INVALID) {
+
+ if ((mode & S_IFMT) != 0) {
+ struct stat st;
+
+ if (stat(fd_path, &st) < 0)
+ return -errno;
+
+ if ((mode & S_IFMT) != (st.st_mode & S_IFMT))
+ return -EINVAL;
+ }
+
+ if (chmod(fd_path, mode & 07777) < 0)
+ return -errno;
+ }
+
+ if (uid != UID_INVALID || gid != GID_INVALID)
+ if (chown(fd_path, uid, gid) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid) {
+ /* Under the assumption that we are running privileged we first change the access mode and only then hand out
+ * ownership to avoid a window where access is too open. */
+
+ if (mode != MODE_INVALID) {
+
+ if ((mode & S_IFMT) != 0) {
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if ((mode & S_IFMT) != (st.st_mode & S_IFMT))
+ return -EINVAL;
+ }
+
+ if (fchmod(fd, mode & 0777) < 0)
+ return -errno;
+ }
+
+ if (uid != UID_INVALID || gid != GID_INVALID)
+ if (fchown(fd, uid, gid) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fchmod_umask(int fd, mode_t m) {
+ mode_t u;
+ int r;
+
+ u = umask(0777);
+ r = fchmod(fd, m & (~u)) < 0 ? -errno : 0;
+ umask(u);
+
+ return r;
+}
+
+int fchmod_opath(int fd, mode_t m) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+
+ /* This function operates also on fd that might have been opened with
+ * O_PATH. Indeed fchmodat() doesn't have the AT_EMPTY_PATH flag like
+ * fchownat() does. */
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ if (chmod(procfs_path, m) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fd_warn_permissions(const char *path, int fd) {
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (st.st_mode & 0111)
+ log_warning("Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway.", path);
+
+ if (st.st_mode & 0002)
+ log_warning("Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway.", path);
+
+ if (getpid_cached() == 1 && (st.st_mode & 0044) != 0044)
+ log_warning("Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway.", path);
+
+ return 0;
+}
+
+int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode) {
+ char fdpath[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_close_ int fd = -1;
+ int r, ret = 0;
+
+ assert(path);
+
+ /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink
+ * itself which is updated, not its target
+ *
+ * Returns the first error we encounter, but tries to apply as much as possible. */
+
+ if (parents)
+ (void) mkdir_parents(path, 0755);
+
+ /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in
+ * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and
+ * won't trigger any driver magic or so. */
+ fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file
+ * here, and nothing else */
+ fd = open(path, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, IN_SET(mode, 0, MODE_INVALID) ? 0644 : mode);
+ if (fd < 0)
+ return -errno;
+ }
+
+ /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode,
+ * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object — which is
+ * something fchown(), fchmod(), futimensat() don't allow. */
+ xsprintf(fdpath, "/proc/self/fd/%i", fd);
+
+ if (mode != MODE_INVALID)
+ if (chmod(fdpath, mode) < 0)
+ ret = -errno;
+
+ if (uid_is_valid(uid) || gid_is_valid(gid))
+ if (chown(fdpath, uid, gid) < 0 && ret >= 0)
+ ret = -errno;
+
+ if (stamp != USEC_INFINITY) {
+ struct timespec ts[2];
+
+ timespec_store(&ts[0], stamp);
+ ts[1] = ts[0];
+ r = utimensat(AT_FDCWD, fdpath, ts, 0);
+ } else
+ r = utimensat(AT_FDCWD, fdpath, NULL, 0);
+ if (r < 0 && ret >= 0)
+ return -errno;
+
+ return ret;
+}
+
+int touch(const char *path) {
+ return touch_file(path, false, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
+}
+
+int symlink_idempotent(const char *from, const char *to, bool make_relative) {
+ _cleanup_free_ char *relpath = NULL;
+ int r;
+
+ assert(from);
+ assert(to);
+
+ if (make_relative) {
+ _cleanup_free_ char *parent = NULL;
+
+ parent = dirname_malloc(to);
+ if (!parent)
+ return -ENOMEM;
+
+ r = path_make_relative(parent, from, &relpath);
+ if (r < 0)
+ return r;
+
+ from = relpath;
+ }
+
+ if (symlink(from, to) < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ if (errno != EEXIST)
+ return -errno;
+
+ r = readlink_malloc(to, &p);
+ if (r == -EINVAL) /* Not a symlink? In that case return the original error we encountered: -EEXIST */
+ return -EEXIST;
+ if (r < 0) /* Any other error? In that case propagate it as is */
+ return r;
+
+ if (!streq(p, from)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+int symlink_atomic(const char *from, const char *to) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(from);
+ assert(to);
+
+ r = tempfn_random(to, NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (symlink(from, t) < 0)
+ return -errno;
+
+ if (rename(t, to) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mknod_atomic(const char *path, mode_t mode, dev_t dev) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(path);
+
+ r = tempfn_random(path, NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (mknod(t, mode, dev) < 0)
+ return -errno;
+
+ if (rename(t, path) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mkfifo_atomic(const char *path, mode_t mode) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(path);
+
+ r = tempfn_random(path, NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (mkfifo(t, mode) < 0)
+ return -errno;
+
+ if (rename(t, path) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mkfifoat_atomic(int dirfd, const char *path, mode_t mode) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(path);
+
+ if (path_is_absolute(path))
+ return mkfifo_atomic(path, mode);
+
+ /* We're only interested in the (random) filename. */
+ r = tempfn_random_child("", NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (mkfifoat(dirfd, t, mode) < 0)
+ return -errno;
+
+ if (renameat(dirfd, t, dirfd, path) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int get_files_in_directory(const char *path, char ***list) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ size_t bufsize = 0, n = 0;
+ _cleanup_strv_free_ char **l = NULL;
+
+ assert(path);
+
+ /* Returns all files in a directory in *list, and the number
+ * of files as return value. If list is NULL returns only the
+ * number. */
+
+ d = opendir(path);
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ dirent_ensure_type(d, de);
+
+ if (!dirent_is_file(de))
+ continue;
+
+ if (list) {
+ /* one extra slot is needed for the terminating NULL */
+ if (!GREEDY_REALLOC(l, bufsize, n + 2))
+ return -ENOMEM;
+
+ l[n] = strdup(de->d_name);
+ if (!l[n])
+ return -ENOMEM;
+
+ l[++n] = NULL;
+ } else
+ n++;
+ }
+
+ if (list)
+ *list = TAKE_PTR(l);
+
+ return n;
+}
+
+static int getenv_tmp_dir(const char **ret_path) {
+ const char *n;
+ int r, ret = 0;
+
+ assert(ret_path);
+
+ /* We use the same order of environment variables python uses in tempfile.gettempdir():
+ * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */
+ FOREACH_STRING(n, "TMPDIR", "TEMP", "TMP") {
+ const char *e;
+
+ e = secure_getenv(n);
+ if (!e)
+ continue;
+ if (!path_is_absolute(e)) {
+ r = -ENOTDIR;
+ goto next;
+ }
+ if (!path_is_normalized(e)) {
+ r = -EPERM;
+ goto next;
+ }
+
+ r = is_dir(e, true);
+ if (r < 0)
+ goto next;
+ if (r == 0) {
+ r = -ENOTDIR;
+ goto next;
+ }
+
+ *ret_path = e;
+ return 1;
+
+ next:
+ /* Remember first error, to make this more debuggable */
+ if (ret >= 0)
+ ret = r;
+ }
+
+ if (ret < 0)
+ return ret;
+
+ *ret_path = NULL;
+ return ret;
+}
+
+static int tmp_dir_internal(const char *def, const char **ret) {
+ const char *e;
+ int r, k;
+
+ assert(def);
+ assert(ret);
+
+ r = getenv_tmp_dir(&e);
+ if (r > 0) {
+ *ret = e;
+ return 0;
+ }
+
+ k = is_dir(def, true);
+ if (k == 0)
+ k = -ENOTDIR;
+ if (k < 0)
+ return r < 0 ? r : k;
+
+ *ret = def;
+ return 0;
+}
+
+int var_tmp_dir(const char **ret) {
+
+ /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus
+ * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is
+ * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR,
+ * making it a variable that overrides all temporary file storage locations. */
+
+ return tmp_dir_internal("/var/tmp", ret);
+}
+
+int tmp_dir(const char **ret) {
+
+ /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually
+ * backed by an in-memory file system: /tmp. */
+
+ return tmp_dir_internal("/tmp", ret);
+}
+
+int unlink_or_warn(const char *filename) {
+ if (unlink(filename) < 0 && errno != ENOENT)
+ /* If the file doesn't exist and the fs simply was read-only (in which
+ * case unlink() returns EROFS even if the file doesn't exist), don't
+ * complain */
+ if (errno != EROFS || access(filename, F_OK) >= 0)
+ return log_error_errno(errno, "Failed to remove \"%s\": %m", filename);
+
+ return 0;
+}
+
+int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
+ char path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ int r;
+
+ /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
+ xsprintf(path, "/proc/self/fd/%i", what);
+
+ r = inotify_add_watch(fd, path, mask);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+static bool unsafe_transition(const struct stat *a, const struct stat *b) {
+ /* Returns true if the transition from a to b is safe, i.e. that we never transition from unprivileged to
+ * privileged files or directories. Why bother? So that unprivileged code can't symlink to privileged files
+ * making us believe we read something safe even though it isn't safe in the specific context we open it in. */
+
+ if (a->st_uid == 0) /* Transitioning from privileged to unprivileged is always fine */
+ return false;
+
+ return a->st_uid != b->st_uid; /* Otherwise we need to stay within the same UID */
+}
+
+static int log_unsafe_transition(int a, int b, const char *path, unsigned flags) {
+ _cleanup_free_ char *n1 = NULL, *n2 = NULL;
+
+ if (!FLAGS_SET(flags, CHASE_WARN))
+ return -ENOLINK;
+
+ (void) fd_get_path(a, &n1);
+ (void) fd_get_path(b, &n2);
+
+ return log_warning_errno(SYNTHETIC_ERRNO(ENOLINK),
+ "Detected unsafe path transition %s %s %s during canonicalization of %s.",
+ n1, special_glyph(SPECIAL_GLYPH_ARROW), n2, path);
+}
+
+static int log_autofs_mount_point(int fd, const char *path, unsigned flags) {
+ _cleanup_free_ char *n1 = NULL;
+
+ if (!FLAGS_SET(flags, CHASE_WARN))
+ return -EREMOTE;
+
+ (void) fd_get_path(fd, &n1);
+
+ return log_warning_errno(SYNTHETIC_ERRNO(EREMOTE),
+ "Detected autofs mount point %s during canonicalization of %s.",
+ n1, path);
+}
+
+int chase_symlinks(const char *path, const char *original_root, unsigned flags, char **ret) {
+ _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL;
+ _cleanup_close_ int fd = -1;
+ unsigned max_follow = CHASE_SYMLINKS_MAX; /* how many symlinks to follow before giving up and returning ELOOP */
+ struct stat previous_stat;
+ bool exists = true;
+ char *todo;
+ int r;
+
+ assert(path);
+
+ /* Either the file may be missing, or we return an fd to the final object, but both make no sense */
+ if (FLAGS_SET(flags, CHASE_NONEXISTENT | CHASE_OPEN))
+ return -EINVAL;
+
+ if (FLAGS_SET(flags, CHASE_STEP | CHASE_OPEN))
+ return -EINVAL;
+
+ if (isempty(path))
+ return -EINVAL;
+
+ /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following
+ * symlinks relative to a root directory, instead of the root of the host.
+ *
+ * Note that "root" primarily matters if we encounter an absolute symlink. It is also used when following
+ * relative symlinks to ensure they cannot be used to "escape" the root directory. The path parameter passed is
+ * assumed to be already prefixed by it, except if the CHASE_PREFIX_ROOT flag is set, in which case it is first
+ * prefixed accordingly.
+ *
+ * Algorithmically this operates on two path buffers: "done" are the components of the path we already
+ * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to
+ * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning
+ * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no
+ * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races
+ * at a minimum.
+ *
+ * Suggested usage: whenever you want to canonicalize a path, use this function. Pass the absolute path you got
+ * as-is: fully qualified and relative to your host's root. Optionally, specify the root parameter to tell this
+ * function what to do when encountering a symlink with an absolute path as directory: prefix it by the
+ * specified path.
+ *
+ * There are three ways to invoke this function:
+ *
+ * 1. Without CHASE_STEP or CHASE_OPEN: in this case the path is resolved and the normalized path is returned
+ * in `ret`. The return value is < 0 on error. If CHASE_NONEXISTENT is also set 0 is returned if the file
+ * doesn't exist, > 0 otherwise. If CHASE_NONEXISTENT is not set >= 0 is returned if the destination was
+ * found, -ENOENT if it doesn't.
+ *
+ * 2. With CHASE_OPEN: in this case the destination is opened after chasing it as O_PATH and this file
+ * descriptor is returned as return value. This is useful to open files relative to some root
+ * directory. Note that the returned O_PATH file descriptors must be converted into a regular one (using
+ * fd_reopen() or such) before it can be used for reading/writing. CHASE_OPEN may not be combined with
+ * CHASE_NONEXISTENT.
+ *
+ * 3. With CHASE_STEP: in this case only a single step of the normalization is executed, i.e. only the first
+ * symlink or ".." component of the path is resolved, and the resulting path is returned. This is useful if
+ * a caller wants to trace the a path through the file system verbosely. Returns < 0 on error, > 0 if the
+ * path is fully normalized, and == 0 for each normalization step. This may be combined with
+ * CHASE_NONEXISTENT, in which case 1 is returned when a component is not found.
+ *
+ * 4. With CHASE_SAFE: in this case the path must not contain unsafe transitions, i.e. transitions from
+ * unprivileged to privileged files or directories. In such cases the return value is -ENOLINK. If
+ * CHASE_WARN is also set a warning describing the unsafe transition is emitted.
+ *
+ * 5. With CHASE_NO_AUTOFS: in this case if an autofs mount point is encountered, the path normalization is
+ * aborted and -EREMOTE is returned. If CHASE_WARN is also set a warning showing the path of the mount point
+ * is emitted.
+ *
+ * */
+
+ /* A root directory of "/" or "" is identical to none */
+ if (empty_or_root(original_root))
+ original_root = NULL;
+
+ if (!original_root && !ret && (flags & (CHASE_NONEXISTENT|CHASE_NO_AUTOFS|CHASE_SAFE|CHASE_OPEN|CHASE_STEP)) == CHASE_OPEN) {
+ /* Shortcut the CHASE_OPEN case if the caller isn't interested in the actual path and has no root set
+ * and doesn't care about any of the other special features we provide either. */
+ r = open(path, O_PATH|O_CLOEXEC|((flags & CHASE_NOFOLLOW) ? O_NOFOLLOW : 0));
+ if (r < 0)
+ return -errno;
+
+ return r;
+ }
+
+ if (original_root) {
+ r = path_make_absolute_cwd(original_root, &root);
+ if (r < 0)
+ return r;
+
+ if (flags & CHASE_PREFIX_ROOT) {
+
+ /* We don't support relative paths in combination with a root directory */
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ path = prefix_roota(root, path);
+ }
+ }
+
+ r = path_make_absolute_cwd(path, &buffer);
+ if (r < 0)
+ return r;
+
+ fd = open("/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ if (flags & CHASE_SAFE) {
+ if (fstat(fd, &previous_stat) < 0)
+ return -errno;
+ }
+
+ todo = buffer;
+ for (;;) {
+ _cleanup_free_ char *first = NULL;
+ _cleanup_close_ int child = -1;
+ struct stat st;
+ size_t n, m;
+
+ /* Determine length of first component in the path */
+ n = strspn(todo, "/"); /* The slashes */
+ m = n + strcspn(todo + n, "/"); /* The entire length of the component */
+
+ /* Extract the first component. */
+ first = strndup(todo, m);
+ if (!first)
+ return -ENOMEM;
+
+ todo += m;
+
+ /* Empty? Then we reached the end. */
+ if (isempty(first))
+ break;
+
+ /* Just a single slash? Then we reached the end. */
+ if (path_equal(first, "/")) {
+ /* Preserve the trailing slash */
+
+ if (flags & CHASE_TRAIL_SLASH)
+ if (!strextend(&done, "/", NULL))
+ return -ENOMEM;
+
+ break;
+ }
+
+ /* Just a dot? Then let's eat this up. */
+ if (path_equal(first, "/."))
+ continue;
+
+ /* Two dots? Then chop off the last bit of what we already found out. */
+ if (path_equal(first, "/..")) {
+ _cleanup_free_ char *parent = NULL;
+ _cleanup_close_ int fd_parent = -1;
+
+ /* If we already are at the top, then going up will not change anything. This is in-line with
+ * how the kernel handles this. */
+ if (empty_or_root(done))
+ continue;
+
+ parent = dirname_malloc(done);
+ if (!parent)
+ return -ENOMEM;
+
+ /* Don't allow this to leave the root dir. */
+ if (root &&
+ path_startswith(done, root) &&
+ !path_startswith(parent, root))
+ continue;
+
+ free_and_replace(done, parent);
+
+ if (flags & CHASE_STEP)
+ goto chased_one;
+
+ fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd_parent < 0)
+ return -errno;
+
+ if (flags & CHASE_SAFE) {
+ if (fstat(fd_parent, &st) < 0)
+ return -errno;
+
+ if (unsafe_transition(&previous_stat, &st))
+ return log_unsafe_transition(fd, fd_parent, path, flags);
+
+ previous_stat = st;
+ }
+
+ safe_close(fd);
+ fd = TAKE_FD(fd_parent);
+
+ continue;
+ }
+
+ /* Otherwise let's see what this is. */
+ child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (child < 0) {
+
+ if (errno == ENOENT &&
+ (flags & CHASE_NONEXISTENT) &&
+ (isempty(todo) || path_is_normalized(todo))) {
+
+ /* If CHASE_NONEXISTENT is set, and the path does not exist, then that's OK, return
+ * what we got so far. But don't allow this if the remaining path contains "../ or "./"
+ * or something else weird. */
+
+ /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
+ if (streq_ptr(done, "/"))
+ *done = '\0';
+
+ if (!strextend(&done, first, todo, NULL))
+ return -ENOMEM;
+
+ exists = false;
+ break;
+ }
+
+ return -errno;
+ }
+
+ if (fstat(child, &st) < 0)
+ return -errno;
+ if ((flags & CHASE_SAFE) &&
+ unsafe_transition(&previous_stat, &st))
+ return log_unsafe_transition(fd, child, path, flags);
+
+ previous_stat = st;
+
+ if ((flags & CHASE_NO_AUTOFS) &&
+ fd_is_fs_type(child, AUTOFS_SUPER_MAGIC) > 0)
+ return log_autofs_mount_point(child, path, flags);
+
+ if (S_ISLNK(st.st_mode) && !((flags & CHASE_NOFOLLOW) && isempty(todo))) {
+ char *joined;
+
+ _cleanup_free_ char *destination = NULL;
+
+ /* This is a symlink, in this case read the destination. But let's make sure we don't follow
+ * symlinks without bounds. */
+ if (--max_follow <= 0)
+ return -ELOOP;
+
+ r = readlinkat_malloc(fd, first + n, &destination);
+ if (r < 0)
+ return r;
+ if (isempty(destination))
+ return -EINVAL;
+
+ if (path_is_absolute(destination)) {
+
+ /* An absolute destination. Start the loop from the beginning, but use the root
+ * directory as base. */
+
+ safe_close(fd);
+ fd = open(root ?: "/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ if (flags & CHASE_SAFE) {
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (unsafe_transition(&previous_stat, &st))
+ return log_unsafe_transition(child, fd, path, flags);
+
+ previous_stat = st;
+ }
+
+ free(done);
+
+ /* Note that we do not revalidate the root, we take it as is. */
+ if (isempty(root))
+ done = NULL;
+ else {
+ done = strdup(root);
+ if (!done)
+ return -ENOMEM;
+ }
+
+ /* Prefix what's left to do with what we just read, and start the loop again, but
+ * remain in the current directory. */
+ joined = strjoin(destination, todo);
+ } else
+ joined = strjoin("/", destination, todo);
+ if (!joined)
+ return -ENOMEM;
+
+ free(buffer);
+ todo = buffer = joined;
+
+ if (flags & CHASE_STEP)
+ goto chased_one;
+
+ continue;
+ }
+
+ /* If this is not a symlink, then let's just add the name we read to what we already verified. */
+ if (!done)
+ done = TAKE_PTR(first);
+ else {
+ /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
+ if (streq(done, "/"))
+ *done = '\0';
+
+ if (!strextend(&done, first, NULL))
+ return -ENOMEM;
+ }
+
+ /* And iterate again, but go one directory further down. */
+ safe_close(fd);
+ fd = TAKE_FD(child);
+ }
+
+ if (!done) {
+ /* Special case, turn the empty string into "/", to indicate the root directory. */
+ done = strdup("/");
+ if (!done)
+ return -ENOMEM;
+ }
+
+ if (ret)
+ *ret = TAKE_PTR(done);
+
+ if (flags & CHASE_OPEN) {
+ /* Return the O_PATH fd we currently are looking to the caller. It can translate it to a proper fd by
+ * opening /proc/self/fd/xyz. */
+
+ assert(fd >= 0);
+ return TAKE_FD(fd);
+ }
+
+ if (flags & CHASE_STEP)
+ return 1;
+
+ return exists;
+
+chased_one:
+ if (ret) {
+ char *c;
+
+ c = strjoin(strempty(done), todo);
+ if (!c)
+ return -ENOMEM;
+
+ *ret = c;
+ }
+
+ return 0;
+}
+
+int chase_symlinks_and_open(
+ const char *path,
+ const char *root,
+ unsigned chase_flags,
+ int open_flags,
+ char **ret_path) {
+
+ _cleanup_close_ int path_fd = -1;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ if (chase_flags & CHASE_NONEXISTENT)
+ return -EINVAL;
+
+ if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
+ /* Shortcut this call if none of the special features of this call are requested */
+ r = open(path, open_flags);
+ if (r < 0)
+ return -errno;
+
+ return r;
+ }
+
+ path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
+ if (path_fd < 0)
+ return path_fd;
+
+ r = fd_reopen(path_fd, open_flags);
+ if (r < 0)
+ return r;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ return r;
+}
+
+int chase_symlinks_and_opendir(
+ const char *path,
+ const char *root,
+ unsigned chase_flags,
+ char **ret_path,
+ DIR **ret_dir) {
+
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_close_ int path_fd = -1;
+ _cleanup_free_ char *p = NULL;
+ DIR *d;
+
+ if (!ret_dir)
+ return -EINVAL;
+ if (chase_flags & CHASE_NONEXISTENT)
+ return -EINVAL;
+
+ if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
+ /* Shortcut this call if none of the special features of this call are requested */
+ d = opendir(path);
+ if (!d)
+ return -errno;
+
+ *ret_dir = d;
+ return 0;
+ }
+
+ path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
+ if (path_fd < 0)
+ return path_fd;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", path_fd);
+ d = opendir(procfs_path);
+ if (!d)
+ return -errno;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ *ret_dir = d;
+ return 0;
+}
+
+int chase_symlinks_and_stat(
+ const char *path,
+ const char *root,
+ unsigned chase_flags,
+ char **ret_path,
+ struct stat *ret_stat) {
+
+ _cleanup_close_ int path_fd = -1;
+ _cleanup_free_ char *p = NULL;
+
+ assert(path);
+ assert(ret_stat);
+
+ if (chase_flags & CHASE_NONEXISTENT)
+ return -EINVAL;
+
+ if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
+ /* Shortcut this call if none of the special features of this call are requested */
+ if (stat(path, ret_stat) < 0)
+ return -errno;
+
+ return 1;
+ }
+
+ path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
+ if (path_fd < 0)
+ return path_fd;
+
+ if (fstat(path_fd, ret_stat) < 0)
+ return -errno;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ if (chase_flags & CHASE_OPEN)
+ return TAKE_FD(path_fd);
+
+ return 1;
+}
+
+int access_fd(int fd, int mode) {
+ char p[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(fd) + 1];
+ int r;
+
+ /* Like access() but operates on an already open fd */
+
+ xsprintf(p, "/proc/self/fd/%i", fd);
+ r = access(p, mode);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+void unlink_tempfilep(char (*p)[]) {
+ /* If the file is created with mkstemp(), it will (almost always)
+ * change the suffix. Treat this as a sign that the file was
+ * successfully created. We ignore both the rare case where the
+ * original suffix is used and unlink failures. */
+ if (!endswith(*p, ".XXXXXX"))
+ (void) unlink_noerrno(*p);
+}
+
+int unlinkat_deallocate(int fd, const char *name, int flags) {
+ _cleanup_close_ int truncate_fd = -1;
+ struct stat st;
+ off_t l, bs;
+
+ /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other
+ * link to it. This is useful to ensure that other processes that might have the file open for reading won't be
+ * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up
+ * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and
+ * returned to the free pool.
+ *
+ * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means
+ * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other
+ * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes
+ * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.)
+ * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file
+ * truncation (🔪), as our goal of deallocating the data space trumps our goal of being nice to readers (💐).
+ *
+ * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the
+ * primary job – to delete the file – is accomplished. */
+
+ if ((flags & AT_REMOVEDIR) == 0) {
+ truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
+ if (truncate_fd < 0) {
+
+ /* If this failed because the file doesn't exist propagate the error right-away. Also,
+ * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is
+ * returned when this is a directory but we are not supposed to delete those, hence propagate
+ * the error right-away too. */
+ if (IN_SET(errno, ENOENT, EISDIR))
+ return -errno;
+
+ if (errno != ELOOP) /* don't complain if this is a symlink */
+ log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name);
+ }
+ }
+
+ if (unlinkat(fd, name, flags) < 0)
+ return -errno;
+
+ if (truncate_fd < 0) /* Don't have a file handle, can't do more ☹️ */
+ return 0;
+
+ if (fstat(truncate_fd, &st) < 0) {
+ log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
+ return 0;
+ }
+
+ if (!S_ISREG(st.st_mode) || st.st_blocks == 0 || st.st_nlink > 0)
+ return 0;
+
+ /* If this is a regular file, it actually took up space on disk and there are no other links it's time to
+ * punch-hole/truncate this to release the disk space. */
+
+ bs = MAX(st.st_blksize, 512);
+ l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */
+
+ if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0)
+ return 0; /* Successfully punched a hole! 😊 */
+
+ /* Fall back to truncation */
+ if (ftruncate(truncate_fd, 0) < 0) {
+ log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m");
+ return 0;
+ }
+
+ return 0;
+}
+
+int fsync_directory_of_file(int fd) {
+ _cleanup_free_ char *path = NULL;
+ _cleanup_close_ int dfd = -1;
+ int r;
+
+ r = fd_verify_regular(fd);
+ if (r < 0)
+ return r;
+
+ r = fd_get_path(fd, &path);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to query /proc/self/fd/%d%s: %m",
+ fd,
+ r == -EOPNOTSUPP ? ", ignoring" : "");
+
+ if (r == -EOPNOTSUPP)
+ /* If /proc is not available, we're most likely running in some
+ * chroot environment, and syncing the directory is not very
+ * important in that case. Let's just silently do nothing. */
+ return 0;
+
+ return r;
+ }
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ dfd = open_parent(path, O_CLOEXEC, 0);
+ if (dfd < 0)
+ return dfd;
+
+ if (fsync(dfd) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fsync_path_at(int at_fd, const char *path) {
+ _cleanup_close_ int opened_fd = -1;
+ int fd;
+
+ if (isempty(path)) {
+ if (at_fd == AT_FDCWD) {
+ opened_fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (opened_fd < 0)
+ return -errno;
+
+ fd = opened_fd;
+ } else
+ fd = at_fd;
+ } else {
+
+ opened_fd = openat(at_fd, path, O_RDONLY|O_CLOEXEC);
+ if (opened_fd < 0)
+ return -errno;
+
+ fd = opened_fd;
+ }
+
+ if (fsync(fd) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int open_parent(const char *path, int flags, mode_t mode) {
+ _cleanup_free_ char *parent = NULL;
+ int fd;
+
+ if (isempty(path))
+ return -EINVAL;
+ if (path_equal(path, "/")) /* requesting the parent of the root dir is fishy, let's prohibit that */
+ return -EINVAL;
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ /* Let's insist on O_DIRECTORY since the parent of a file or directory is a directory. Except if we open an
+ * O_TMPFILE file, because in that case we are actually create a regular file below the parent directory. */
+
+ if ((flags & O_PATH) == O_PATH)
+ flags |= O_DIRECTORY;
+ else if ((flags & O_TMPFILE) != O_TMPFILE)
+ flags |= O_DIRECTORY|O_RDONLY;
+
+ fd = open(parent, flags, mode);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h
new file mode 100644
index 0000000..7ad030b
--- /dev/null
+++ b/src/basic/fs-util.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/inotify.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "time-util.h"
+#include "util.h"
+
+int unlink_noerrno(const char *path);
+
+int rmdir_parents(const char *path, const char *stop);
+
+int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath);
+
+int readlinkat_malloc(int fd, const char *p, char **ret);
+int readlink_malloc(const char *p, char **r);
+int readlink_value(const char *p, char **ret);
+int readlink_and_make_absolute(const char *p, char **r);
+
+int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid);
+int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid);
+
+int fchmod_umask(int fd, mode_t mode);
+int fchmod_opath(int fd, mode_t m);
+
+int fd_warn_permissions(const char *path, int fd);
+
+#define laccess(path, mode) faccessat(AT_FDCWD, (path), (mode), AT_SYMLINK_NOFOLLOW)
+
+int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode);
+int touch(const char *path);
+
+int symlink_idempotent(const char *from, const char *to, bool make_relative);
+
+int symlink_atomic(const char *from, const char *to);
+int mknod_atomic(const char *path, mode_t mode, dev_t dev);
+int mkfifo_atomic(const char *path, mode_t mode);
+int mkfifoat_atomic(int dir_fd, const char *path, mode_t mode);
+
+int get_files_in_directory(const char *path, char ***list);
+
+int tmp_dir(const char **ret);
+int var_tmp_dir(const char **ret);
+
+int unlink_or_warn(const char *filename);
+
+#define INOTIFY_EVENT_MAX (sizeof(struct inotify_event) + NAME_MAX + 1)
+
+#define FOREACH_INOTIFY_EVENT(e, buffer, sz) \
+ for ((e) = &buffer.ev; \
+ (uint8_t*) (e) < (uint8_t*) (buffer.raw) + (sz); \
+ (e) = (struct inotify_event*) ((uint8_t*) (e) + sizeof(struct inotify_event) + (e)->len))
+
+union inotify_event_buffer {
+ struct inotify_event ev;
+ uint8_t raw[INOTIFY_EVENT_MAX];
+};
+
+int inotify_add_watch_fd(int fd, int what, uint32_t mask);
+
+enum {
+ CHASE_PREFIX_ROOT = 1 << 0, /* If set, the specified path will be prefixed by the specified root before beginning the iteration */
+ CHASE_NONEXISTENT = 1 << 1, /* If set, it's OK if the path doesn't actually exist. */
+ CHASE_NO_AUTOFS = 1 << 2, /* If set, return -EREMOTE if autofs mount point found */
+ CHASE_SAFE = 1 << 3, /* If set, return EPERM if we ever traverse from unprivileged to privileged files or directories */
+ CHASE_OPEN = 1 << 4, /* If set, return an O_PATH object to the final component */
+ CHASE_TRAIL_SLASH = 1 << 5, /* If set, any trailing slash will be preserved */
+ CHASE_STEP = 1 << 6, /* If set, just execute a single step of the normalization */
+ CHASE_NOFOLLOW = 1 << 7, /* Only valid with CHASE_OPEN: when the path's right-most component refers to symlink return O_PATH fd of the symlink, rather than following it. */
+ CHASE_WARN = 1 << 8, /* Emit an appropriate warning when an error is encountered */
+};
+
+/* How many iterations to execute before returning -ELOOP */
+#define CHASE_SYMLINKS_MAX 32
+
+int chase_symlinks(const char *path_with_prefix, const char *root, unsigned flags, char **ret);
+
+int chase_symlinks_and_open(const char *path, const char *root, unsigned chase_flags, int open_flags, char **ret_path);
+int chase_symlinks_and_opendir(const char *path, const char *root, unsigned chase_flags, char **ret_path, DIR **ret_dir);
+int chase_symlinks_and_stat(const char *path, const char *root, unsigned chase_flags, char **ret_path, struct stat *ret_stat);
+
+/* Useful for usage with _cleanup_(), removes a directory and frees the pointer */
+static inline void rmdir_and_free(char *p) {
+ PROTECT_ERRNO;
+ (void) rmdir(p);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rmdir_and_free);
+
+static inline void unlink_and_free(char *p) {
+ (void) unlink_noerrno(p);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free);
+
+int access_fd(int fd, int mode);
+
+void unlink_tempfilep(char (*p)[]);
+int unlinkat_deallocate(int fd, const char *name, int flags);
+
+int fsync_directory_of_file(int fd);
+int fsync_path_at(int at_fd, const char *path);
+
+int open_parent(const char *path, int flags, mode_t mode);
diff --git a/src/basic/gcrypt-util.c b/src/basic/gcrypt-util.c
new file mode 100644
index 0000000..f304a2b
--- /dev/null
+++ b/src/basic/gcrypt-util.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_GCRYPT
+#include <gcrypt.h>
+
+#include "gcrypt-util.h"
+#include "hexdecoct.h"
+
+void initialize_libgcrypt(bool secmem) {
+ const char *p;
+ if (gcry_control(GCRYCTL_INITIALIZATION_FINISHED_P))
+ return;
+
+ p = gcry_check_version("1.4.5");
+ assert(p);
+
+ /* Turn off "secmem". Clients which wish to make use of this
+ * feature should initialize the library manually */
+ if (!secmem)
+ gcry_control(GCRYCTL_DISABLE_SECMEM);
+ gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);
+}
+
+int string_hashsum(const char *s, size_t len, int md_algorithm, char **out) {
+ _cleanup_(gcry_md_closep) gcry_md_hd_t md = NULL;
+ size_t hash_size;
+ void *hash;
+ char *enc;
+
+ initialize_libgcrypt(false);
+
+ hash_size = gcry_md_get_algo_dlen(md_algorithm);
+ assert(hash_size > 0);
+
+ gcry_md_open(&md, md_algorithm, 0);
+ if (!md)
+ return -EIO;
+
+ gcry_md_write(md, s, len);
+
+ hash = gcry_md_read(md, 0);
+ if (!hash)
+ return -EIO;
+
+ enc = hexmem(hash, hash_size);
+ if (!enc)
+ return -ENOMEM;
+
+ *out = enc;
+ return 0;
+}
+#endif
diff --git a/src/basic/gcrypt-util.h b/src/basic/gcrypt-util.h
new file mode 100644
index 0000000..87eb606
--- /dev/null
+++ b/src/basic/gcrypt-util.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#if HAVE_GCRYPT
+#include <gcrypt.h>
+
+#include "macro.h"
+
+void initialize_libgcrypt(bool secmem);
+int string_hashsum(const char *s, size_t len, int md_algorithm, char **out);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(gcry_md_hd_t, gcry_md_close);
+#endif
+
+static inline int string_hashsum_sha224(const char *s, size_t len, char **out) {
+#if HAVE_GCRYPT
+ return string_hashsum(s, len, GCRY_MD_SHA224, out);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static inline int string_hashsum_sha256(const char *s, size_t len, char **out) {
+#if HAVE_GCRYPT
+ return string_hashsum(s, len, GCRY_MD_SHA256, out);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
diff --git a/src/basic/generate-af-list.sh b/src/basic/generate-af-list.sh
new file mode 100755
index 0000000..5bf244c
--- /dev/null
+++ b/src/basic/generate-af-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -E -dM -include sys/socket.h -include "$2" -include "$3" - </dev/null | \
+ grep -Ev 'AF_UNSPEC|AF_MAX' | \
+ awk '/^#define[ \t]+AF_[^ \t]+[ \t]+[AP]F_[^ \t]/ { print $2; }'
diff --git a/src/basic/generate-arphrd-list.sh b/src/basic/generate-arphrd-list.sh
new file mode 100755
index 0000000..e6e874a
--- /dev/null
+++ b/src/basic/generate-arphrd-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include net/if_arp.h -include "$2" -include "$3" - </dev/null | \
+ awk '/^#define[ \t]+ARPHRD_[^ \t]+[ \t]+[^ \t]/ { print $2; }' | \
+ sed -e 's/ARPHRD_//'
diff --git a/src/basic/generate-cap-list.sh b/src/basic/generate-cap-list.sh
new file mode 100755
index 0000000..0628d24
--- /dev/null
+++ b/src/basic/generate-cap-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include linux/capability.h -include "$2" -include "$3" - </dev/null | \
+ awk '/^#define[ \t]+CAP_[A-Z_]+[ \t]+/ { print $2; }' | \
+ grep -v CAP_LAST_CAP
diff --git a/src/basic/generate-errno-list.sh b/src/basic/generate-errno-list.sh
new file mode 100755
index 0000000..953d5e3
--- /dev/null
+++ b/src/basic/generate-errno-list.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include errno.h - </dev/null | \
+ awk '/^#define[ \t]+E[^ _]+[ \t]+/ { print $2; }'
diff --git a/src/basic/glob-util.c b/src/basic/glob-util.c
new file mode 100644
index 0000000..9fac676
--- /dev/null
+++ b/src/basic/glob-util.c
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <glob.h>
+#include <sys/types.h>
+
+#include "dirent-util.h"
+#include "glob-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "strv.h"
+
+static void closedir_wrapper(void* v) {
+ (void) closedir(v);
+}
+
+int safe_glob(const char *path, int flags, glob_t *pglob) {
+ int k;
+
+ /* We want to set GLOB_ALTDIRFUNC ourselves, don't allow it to be set. */
+ assert(!(flags & GLOB_ALTDIRFUNC));
+
+ if (!pglob->gl_closedir)
+ pglob->gl_closedir = closedir_wrapper;
+ if (!pglob->gl_readdir)
+ pglob->gl_readdir = (struct dirent *(*)(void *)) readdir_no_dot;
+ if (!pglob->gl_opendir)
+ pglob->gl_opendir = (void *(*)(const char *)) opendir;
+ if (!pglob->gl_lstat)
+ pglob->gl_lstat = lstat;
+ if (!pglob->gl_stat)
+ pglob->gl_stat = stat;
+
+ errno = 0;
+ k = glob(path, flags | GLOB_ALTDIRFUNC, NULL, pglob);
+
+ if (k == GLOB_NOMATCH)
+ return -ENOENT;
+ if (k == GLOB_NOSPACE)
+ return -ENOMEM;
+ if (k != 0)
+ return errno > 0 ? -errno : -EIO;
+ if (strv_isempty(pglob->gl_pathv))
+ return -ENOENT;
+
+ return 0;
+}
+
+int glob_exists(const char *path) {
+ _cleanup_globfree_ glob_t g = {};
+ int k;
+
+ assert(path);
+
+ k = safe_glob(path, GLOB_NOSORT|GLOB_BRACE, &g);
+ if (k == -ENOENT)
+ return false;
+ if (k < 0)
+ return k;
+ return true;
+}
+
+int glob_extend(char ***strv, const char *path) {
+ _cleanup_globfree_ glob_t g = {};
+ int k;
+
+ k = safe_glob(path, GLOB_NOSORT|GLOB_BRACE, &g);
+ if (k < 0)
+ return k;
+
+ return strv_extend_strv(strv, g.gl_pathv, false);
+}
diff --git a/src/basic/glob-util.h b/src/basic/glob-util.h
new file mode 100644
index 0000000..8e226c1
--- /dev/null
+++ b/src/basic/glob-util.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <glob.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "macro.h"
+#include "string-util.h"
+
+/* Note: this function modifies pglob to set various functions. */
+int safe_glob(const char *path, int flags, glob_t *pglob);
+
+int glob_exists(const char *path);
+int glob_extend(char ***strv, const char *path);
+
+#define _cleanup_globfree_ _cleanup_(globfree)
+
+_pure_ static inline bool string_is_glob(const char *p) {
+ /* Check if a string contains any glob patterns. */
+ return !!strpbrk(p, GLOB_CHARS);
+}
diff --git a/src/basic/gunicode.c b/src/basic/gunicode.c
new file mode 100644
index 0000000..c51b1a7
--- /dev/null
+++ b/src/basic/gunicode.c
@@ -0,0 +1,110 @@
+/* gunicode.c - Unicode manipulation functions
+ *
+ * Copyright (C) 1999, 2000 Tom Tromey
+ * Copyright © 2000, 2005 Red Hat, Inc.
+ */
+
+#include "gunicode.h"
+
+#define unichar uint32_t
+
+/**
+ * g_utf8_prev_char:
+ * @p: a pointer to a position within a UTF-8 encoded string
+ *
+ * Finds the previous UTF-8 character in the string before @p.
+ *
+ * @p does not have to be at the beginning of a UTF-8 character. No check
+ * is made to see if the character found is actually valid other than
+ * it starts with an appropriate byte. If @p might be the first
+ * character of the string, you must use g_utf8_find_prev_char() instead.
+ *
+ * Return value: a pointer to the found character.
+ **/
+char *
+utf8_prev_char (const char *p)
+{
+ for (;;)
+ {
+ p--;
+ if ((*p & 0xc0) != 0x80)
+ return (char *)p;
+ }
+}
+
+struct Interval
+{
+ unichar start, end;
+};
+
+static int
+interval_compare (const void *key, const void *elt)
+{
+ unichar c = (unichar) (long) (key);
+ struct Interval *interval = (struct Interval *)elt;
+
+ if (c < interval->start)
+ return -1;
+ if (c > interval->end)
+ return +1;
+
+ return 0;
+}
+
+/*
+ * NOTE:
+ *
+ * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are
+ * generated from the Unicode Character Database's file
+ * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py
+ * in this way:
+ *
+ * ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt
+ *
+ * Last update for Unicode 6.0.
+ */
+
+/**
+ * g_unichar_iswide:
+ * @c: a Unicode character
+ *
+ * Determines if a character is typically rendered in a double-width
+ * cell.
+ *
+ * Return value: %TRUE if the character is wide
+ **/
+bool
+unichar_iswide (unichar c)
+{
+ /* See NOTE earlier for how to update this table. */
+ static const struct Interval wide[] = {
+ {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
+ {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096},
+ {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA},
+ {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE},
+ {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
+ {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
+ {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6},
+ {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A},
+ {0x1F240, 0x1F248}, {0x1F250, 0x1F251},
+ {0x1F300, 0x1F567}, /* Miscellaneous Symbols and Pictographs */
+ {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
+ };
+
+ if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0],
+ interval_compare))
+ return true;
+
+ return false;
+}
+
+const char utf8_skip_data[256] = {
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
+};
diff --git a/src/basic/gunicode.h b/src/basic/gunicode.h
new file mode 100644
index 0000000..a16b7b6
--- /dev/null
+++ b/src/basic/gunicode.h
@@ -0,0 +1,30 @@
+#pragma once
+
+/* gunicode.h - Unicode manipulation functions
+ *
+ * Copyright (C) 1999, 2000 Tom Tromey
+ * Copyright © 2000, 2005 Red Hat, Inc.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+char *utf8_prev_char (const char *p);
+
+extern const char utf8_skip_data[256];
+
+/**
+ * g_utf8_next_char:
+ * @p: Pointer to the start of a valid UTF-8 character
+ *
+ * Skips to the next character in a UTF-8 string. The string must be
+ * valid; this macro is as fast as possible, and has no error-checking.
+ * You would use this macro to iterate over a string character by
+ * character. The macro returns the start of the next UTF-8 character.
+ * Before using this macro, use g_utf8_validate() to validate strings
+ * that may contain invalid UTF-8.
+ */
+#define utf8_next_char(p) (char *)((p) + utf8_skip_data[*(const unsigned char *)(p)])
+
+bool unichar_iswide (uint32_t c);
diff --git a/src/basic/hash-funcs.c b/src/basic/hash-funcs.c
new file mode 100644
index 0000000..1be43d4
--- /dev/null
+++ b/src/basic/hash-funcs.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "hash-funcs.h"
+#include "path-util.h"
+
+void string_hash_func(const char *p, struct siphash *state) {
+ siphash24_compress(p, strlen(p) + 1, state);
+}
+
+DEFINE_HASH_OPS(string_hash_ops, char, string_hash_func, string_compare_func);
+
+void path_hash_func(const char *q, struct siphash *state) {
+ size_t n;
+
+ assert(q);
+ assert(state);
+
+ /* Calculates a hash for a path in a way this duplicate inner slashes don't make a differences, and also
+ * whether there's a trailing slash or not. This fits well with the semantics of path_compare(), which does
+ * similar checks and also doesn't care for trailing slashes. Note that relative and absolute paths (i.e. those
+ * which begin in a slash or not) will hash differently though. */
+
+ n = strspn(q, "/");
+ if (n > 0) { /* Eat up initial slashes, and add one "/" to the hash for all of them */
+ siphash24_compress(q, 1, state);
+ q += n;
+ }
+
+ for (;;) {
+ /* Determine length of next component */
+ n = strcspn(q, "/");
+ if (n == 0) /* Reached the end? */
+ break;
+
+ /* Add this component to the hash and skip over it */
+ siphash24_compress(q, n, state);
+ q += n;
+
+ /* How many slashes follow this component? */
+ n = strspn(q, "/");
+ if (q[n] == 0) /* Is this a trailing slash? If so, we are at the end, and don't care about the slashes anymore */
+ break;
+
+ /* We are not add the end yet. Hash exactly one slash for all of the ones we just encountered. */
+ siphash24_compress(q, 1, state);
+ q += n;
+ }
+}
+
+int path_compare_func(const char *a, const char *b) {
+ return path_compare(a, b);
+}
+
+DEFINE_HASH_OPS(path_hash_ops, char, path_hash_func, path_compare_func);
+
+void trivial_hash_func(const void *p, struct siphash *state) {
+ siphash24_compress(&p, sizeof(p), state);
+}
+
+int trivial_compare_func(const void *a, const void *b) {
+ return CMP(a, b);
+}
+
+const struct hash_ops trivial_hash_ops = {
+ .hash = trivial_hash_func,
+ .compare = trivial_compare_func,
+};
+
+void uint64_hash_func(const uint64_t *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(uint64_t), state);
+}
+
+int uint64_compare_func(const uint64_t *a, const uint64_t *b) {
+ return CMP(*a, *b);
+}
+
+DEFINE_HASH_OPS(uint64_hash_ops, uint64_t, uint64_hash_func, uint64_compare_func);
+
+#if SIZEOF_DEV_T != 8
+void devt_hash_func(const dev_t *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(dev_t), state);
+}
+
+int devt_compare_func(const dev_t *a, const dev_t *b) {
+ return CMP(*a, *b);
+}
+
+DEFINE_HASH_OPS(devt_hash_ops, dev_t, devt_hash_func, devt_compare_func);
+#endif
diff --git a/src/basic/hash-funcs.h b/src/basic/hash-funcs.h
new file mode 100644
index 0000000..3d2ae4b
--- /dev/null
+++ b/src/basic/hash-funcs.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "siphash24.h"
+
+typedef void (*hash_func_t)(const void *p, struct siphash *state);
+typedef int (*compare_func_t)(const void *a, const void *b);
+
+struct hash_ops {
+ hash_func_t hash;
+ compare_func_t compare;
+ free_func_t free_key;
+ free_func_t free_value;
+};
+
+#define _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, free_key_func, free_value_func, scope) \
+ _unused_ static void (* UNIQ_T(static_hash_wrapper, uq))(const type *, struct siphash *) = hash_func; \
+ _unused_ static int (* UNIQ_T(static_compare_wrapper, uq))(const type *, const type *) = compare_func; \
+ scope const struct hash_ops name = { \
+ .hash = (hash_func_t) hash_func, \
+ .compare = (compare_func_t) compare_func, \
+ .free_key = free_key_func, \
+ .free_value = free_value_func, \
+ }
+
+#define _DEFINE_FREE_FUNC(uq, type, wrapper_name, func) \
+ /* Type-safe free function */ \
+ static void UNIQ_T(wrapper_name, uq)(void *a) { \
+ type *_a = a; \
+ func(_a); \
+ }
+
+#define _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(uq, name, type, hash_func, compare_func, free_func, scope) \
+ _DEFINE_FREE_FUNC(uq, type, static_free_wrapper, free_func); \
+ _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \
+ UNIQ_T(static_free_wrapper, uq), NULL, scope)
+
+#define _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(uq, name, type, hash_func, compare_func, type_value, free_func, scope) \
+ _DEFINE_FREE_FUNC(uq, type_value, static_free_wrapper, free_func); \
+ _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \
+ NULL, UNIQ_T(static_free_wrapper, uq), scope)
+
+#define _DEFINE_HASH_OPS_FULL(uq, name, type, hash_func, compare_func, free_key_func, type_value, free_value_func, scope) \
+ _DEFINE_FREE_FUNC(uq, type, static_free_key_wrapper, free_key_func); \
+ _DEFINE_FREE_FUNC(uq, type_value, static_free_value_wrapper, free_value_func); \
+ _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \
+ UNIQ_T(static_free_key_wrapper, uq), \
+ UNIQ_T(static_free_value_wrapper, uq), scope)
+
+#define DEFINE_HASH_OPS(name, type, hash_func, compare_func) \
+ _DEFINE_HASH_OPS(UNIQ, name, type, hash_func, compare_func, NULL, NULL,)
+
+#define DEFINE_PRIVATE_HASH_OPS(name, type, hash_func, compare_func) \
+ _DEFINE_HASH_OPS(UNIQ, name, type, hash_func, compare_func, NULL, NULL, static)
+
+#define DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(name, type, hash_func, compare_func, free_func) \
+ _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, free_func,)
+
+#define DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(name, type, hash_func, compare_func, free_func) \
+ _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, free_func, static)
+
+#define DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(name, type, hash_func, compare_func, value_type, free_func) \
+ _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, value_type, free_func,)
+
+#define DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(name, type, hash_func, compare_func, value_type, free_func) \
+ _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, value_type, free_func, static)
+
+#define DEFINE_HASH_OPS_FULL(name, type, hash_func, compare_func, free_key_func, value_type, free_value_func) \
+ _DEFINE_HASH_OPS_FULL(UNIQ, name, type, hash_func, compare_func, free_key_func, value_type, free_value_func,)
+
+#define DEFINE_PRIVATE_HASH_OPS_FULL(name, type, hash_func, compare_func, free_key_func, value_type, free_value_func) \
+ _DEFINE_HASH_OPS_FULL(UNIQ, name, type, hash_func, compare_func, free_key_func, value_type, free_value_func, static)
+
+void string_hash_func(const char *p, struct siphash *state);
+#define string_compare_func strcmp
+extern const struct hash_ops string_hash_ops;
+
+void path_hash_func(const char *p, struct siphash *state);
+int path_compare_func(const char *a, const char *b) _pure_;
+extern const struct hash_ops path_hash_ops;
+
+/* This will compare the passed pointers directly, and will not dereference them. This is hence not useful for strings
+ * or suchlike. */
+void trivial_hash_func(const void *p, struct siphash *state);
+int trivial_compare_func(const void *a, const void *b) _const_;
+extern const struct hash_ops trivial_hash_ops;
+
+/* 32bit values we can always just embed in the pointer itself, but in order to support 32bit archs we need store 64bit
+ * values indirectly, since they don't fit in a pointer. */
+void uint64_hash_func(const uint64_t *p, struct siphash *state);
+int uint64_compare_func(const uint64_t *a, const uint64_t *b) _pure_;
+extern const struct hash_ops uint64_hash_ops;
+
+/* On some archs dev_t is 32bit, and on others 64bit. And sometimes it's 64bit on 32bit archs, and sometimes 32bit on
+ * 64bit archs. Yuck! */
+#if SIZEOF_DEV_T != 8
+void devt_hash_func(const dev_t *p, struct siphash *state) _pure_;
+int devt_compare_func(const dev_t *a, const dev_t *b) _pure_;
+extern const struct hash_ops devt_hash_ops;
+#else
+#define devt_hash_func uint64_hash_func
+#define devt_compare_func uint64_compare_func
+#define devt_hash_ops uint64_hash_ops
+#endif
diff --git a/src/basic/hashmap.c b/src/basic/hashmap.c
new file mode 100644
index 0000000..66e9e00
--- /dev/null
+++ b/src/basic/hashmap.c
@@ -0,0 +1,1911 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "mempool.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "set.h"
+#include "siphash24.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+#if ENABLE_DEBUG_HASHMAP
+#include <pthread.h>
+#include "list.h"
+#endif
+
+/*
+ * Implementation of hashmaps.
+ * Addressing: open
+ * - uses less RAM compared to closed addressing (chaining), because
+ * our entries are small (especially in Sets, which tend to contain
+ * the majority of entries in systemd).
+ * Collision resolution: Robin Hood
+ * - tends to equalize displacement of entries from their optimal buckets.
+ * Probe sequence: linear
+ * - though theoretically worse than random probing/uniform hashing/double
+ * hashing, it is good for cache locality.
+ *
+ * References:
+ * Celis, P. 1986. Robin Hood Hashing.
+ * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada.
+ * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
+ * - The results are derived for random probing. Suggests deletion with
+ * tombstones and two mean-centered search methods. None of that works
+ * well for linear probing.
+ *
+ * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies.
+ * ACM Trans. Algorithms 1, 2 (October 2005), 177-213.
+ * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964
+ * http://www.math.uu.se/~svante/papers/sj157.pdf
+ * - Applies to Robin Hood with linear probing. Contains remarks on
+ * the unsuitability of mean-centered search with linear probing.
+ *
+ * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing.
+ * ACM Trans. Algorithms 1, 2 (October 2005), 214-242.
+ * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965
+ * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes
+ * in a successful search), and Janson writes about displacement. C = d + 1.
+ *
+ * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion.
+ * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
+ * - Explanation of backward shift deletion with pictures.
+ *
+ * Khuong, P. 2013. The Other Robin Hood Hashing.
+ * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/
+ * - Short summary of random vs. linear probing, and tombstones vs. backward shift.
+ */
+
+/*
+ * XXX Ideas for improvement:
+ * For unordered hashmaps, randomize iteration order, similarly to Perl:
+ * http://blog.booking.com/hardening-perls-hash-function.html
+ */
+
+/* INV_KEEP_FREE = 1 / (1 - max_load_factor)
+ * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */
+#define INV_KEEP_FREE 5U
+
+/* Fields common to entries of all hashmap/set types */
+struct hashmap_base_entry {
+ const void *key;
+};
+
+/* Entry types for specific hashmap/set types
+ * hashmap_base_entry must be at the beginning of each entry struct. */
+
+struct plain_hashmap_entry {
+ struct hashmap_base_entry b;
+ void *value;
+};
+
+struct ordered_hashmap_entry {
+ struct plain_hashmap_entry p;
+ unsigned iterate_next, iterate_previous;
+};
+
+struct set_entry {
+ struct hashmap_base_entry b;
+};
+
+/* In several functions it is advantageous to have the hash table extended
+ * virtually by a couple of additional buckets. We reserve special index values
+ * for these "swap" buckets. */
+#define _IDX_SWAP_BEGIN (UINT_MAX - 3)
+#define IDX_PUT (_IDX_SWAP_BEGIN + 0)
+#define IDX_TMP (_IDX_SWAP_BEGIN + 1)
+#define _IDX_SWAP_END (_IDX_SWAP_BEGIN + 2)
+
+#define IDX_FIRST (UINT_MAX - 1) /* special index for freshly initialized iterators */
+#define IDX_NIL UINT_MAX /* special index value meaning "none" or "end" */
+
+assert_cc(IDX_FIRST == _IDX_SWAP_END);
+assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST);
+
+/* Storage space for the "swap" buckets.
+ * All entry types can fit into a ordered_hashmap_entry. */
+struct swap_entries {
+ struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN];
+};
+
+/* Distance from Initial Bucket */
+typedef uint8_t dib_raw_t;
+#define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU) /* indicates DIB value is greater than representable */
+#define DIB_RAW_REHASH ((dib_raw_t)0xfeU) /* entry yet to be rehashed during in-place resize */
+#define DIB_RAW_FREE ((dib_raw_t)0xffU) /* a free bucket */
+#define DIB_RAW_INIT ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */
+
+#define DIB_FREE UINT_MAX
+
+#if ENABLE_DEBUG_HASHMAP
+struct hashmap_debug_info {
+ LIST_FIELDS(struct hashmap_debug_info, debug_list);
+ unsigned max_entries; /* high watermark of n_entries */
+
+ /* who allocated this hashmap */
+ int line;
+ const char *file;
+ const char *func;
+
+ /* fields to detect modification while iterating */
+ unsigned put_count; /* counts puts into the hashmap */
+ unsigned rem_count; /* counts removals from hashmap */
+ unsigned last_rem_idx; /* remembers last removal index */
+};
+
+/* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */
+static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list);
+static pthread_mutex_t hashmap_debug_list_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+#define HASHMAP_DEBUG_FIELDS struct hashmap_debug_info debug;
+
+#else /* !ENABLE_DEBUG_HASHMAP */
+#define HASHMAP_DEBUG_FIELDS
+#endif /* ENABLE_DEBUG_HASHMAP */
+
+enum HashmapType {
+ HASHMAP_TYPE_PLAIN,
+ HASHMAP_TYPE_ORDERED,
+ HASHMAP_TYPE_SET,
+ _HASHMAP_TYPE_MAX
+};
+
+struct _packed_ indirect_storage {
+ void *storage; /* where buckets and DIBs are stored */
+ uint8_t hash_key[HASH_KEY_SIZE]; /* hash key; changes during resize */
+
+ unsigned n_entries; /* number of stored entries */
+ unsigned n_buckets; /* number of buckets */
+
+ unsigned idx_lowest_entry; /* Index below which all buckets are free.
+ Makes "while(hashmap_steal_first())" loops
+ O(n) instead of O(n^2) for unordered hashmaps. */
+ uint8_t _pad[3]; /* padding for the whole HashmapBase */
+ /* The bitfields in HashmapBase complete the alignment of the whole thing. */
+};
+
+struct direct_storage {
+ /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit.
+ * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit,
+ * or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */
+ uint8_t storage[sizeof(struct indirect_storage)];
+};
+
+#define DIRECT_BUCKETS(entry_t) \
+ (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t)))
+
+/* We should be able to store at least one entry directly. */
+assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1);
+
+/* We have 3 bits for n_direct_entries. */
+assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3));
+
+/* Hashmaps with directly stored entries all use this shared hash key.
+ * It's no big deal if the key is guessed, because there can be only
+ * a handful of directly stored entries in a hashmap. When a hashmap
+ * outgrows direct storage, it gets its own key for indirect storage. */
+static uint8_t shared_hash_key[HASH_KEY_SIZE];
+static bool shared_hash_key_initialized;
+
+/* Fields that all hashmap/set types must have */
+struct HashmapBase {
+ const struct hash_ops *hash_ops; /* hash and compare ops to use */
+
+ union _packed_ {
+ struct indirect_storage indirect; /* if has_indirect */
+ struct direct_storage direct; /* if !has_indirect */
+ };
+
+ enum HashmapType type:2; /* HASHMAP_TYPE_* */
+ bool has_indirect:1; /* whether indirect storage is used */
+ unsigned n_direct_entries:3; /* Number of entries in direct storage.
+ * Only valid if !has_indirect. */
+ bool from_pool:1; /* whether was allocated from mempool */
+ bool dirty:1; /* whether dirtied since last iterated_cache_get() */
+ bool cached:1; /* whether this hashmap is being cached */
+ HASHMAP_DEBUG_FIELDS /* optional hashmap_debug_info */
+};
+
+/* Specific hash types
+ * HashmapBase must be at the beginning of each hashmap struct. */
+
+struct Hashmap {
+ struct HashmapBase b;
+};
+
+struct OrderedHashmap {
+ struct HashmapBase b;
+ unsigned iterate_list_head, iterate_list_tail;
+};
+
+struct Set {
+ struct HashmapBase b;
+};
+
+typedef struct CacheMem {
+ const void **ptr;
+ size_t n_populated, n_allocated;
+ bool active:1;
+} CacheMem;
+
+struct IteratedCache {
+ HashmapBase *hashmap;
+ CacheMem keys, values;
+};
+
+DEFINE_MEMPOOL(hashmap_pool, Hashmap, 8);
+DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8);
+/* No need for a separate Set pool */
+assert_cc(sizeof(Hashmap) == sizeof(Set));
+
+struct hashmap_type_info {
+ size_t head_size;
+ size_t entry_size;
+ struct mempool *mempool;
+ unsigned n_direct_buckets;
+};
+
+static const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = {
+ [HASHMAP_TYPE_PLAIN] = {
+ .head_size = sizeof(Hashmap),
+ .entry_size = sizeof(struct plain_hashmap_entry),
+ .mempool = &hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry),
+ },
+ [HASHMAP_TYPE_ORDERED] = {
+ .head_size = sizeof(OrderedHashmap),
+ .entry_size = sizeof(struct ordered_hashmap_entry),
+ .mempool = &ordered_hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry),
+ },
+ [HASHMAP_TYPE_SET] = {
+ .head_size = sizeof(Set),
+ .entry_size = sizeof(struct set_entry),
+ .mempool = &hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct set_entry),
+ },
+};
+
+#if VALGRIND
+_destructor_ static void cleanup_pools(void) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ /* Be nice to valgrind */
+
+ /* The pool is only allocated by the main thread, but the memory can
+ * be passed to other threads. Let's clean up if we are the main thread
+ * and no other threads are live. */
+ if (!is_main_thread())
+ return;
+
+ r = get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t);
+ if (r < 0 || !streq(t, "1"))
+ return;
+
+ mempool_drop(&hashmap_pool);
+ mempool_drop(&ordered_hashmap_pool);
+}
+#endif
+
+static unsigned n_buckets(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.n_buckets
+ : hashmap_type_info[h->type].n_direct_buckets;
+}
+
+static unsigned n_entries(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.n_entries
+ : h->n_direct_entries;
+}
+
+static void n_entries_inc(HashmapBase *h) {
+ if (h->has_indirect)
+ h->indirect.n_entries++;
+ else
+ h->n_direct_entries++;
+}
+
+static void n_entries_dec(HashmapBase *h) {
+ if (h->has_indirect)
+ h->indirect.n_entries--;
+ else
+ h->n_direct_entries--;
+}
+
+static void *storage_ptr(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.storage
+ : h->direct.storage;
+}
+
+static uint8_t *hash_key(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.hash_key
+ : shared_hash_key;
+}
+
+static unsigned base_bucket_hash(HashmapBase *h, const void *p) {
+ struct siphash state;
+ uint64_t hash;
+
+ siphash24_init(&state, hash_key(h));
+
+ h->hash_ops->hash(p, &state);
+
+ hash = siphash24_finalize(&state);
+
+ return (unsigned) (hash % n_buckets(h));
+}
+#define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p)
+
+static void base_set_dirty(HashmapBase *h) {
+ h->dirty = true;
+}
+#define hashmap_set_dirty(h) base_set_dirty(HASHMAP_BASE(h))
+
+static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) {
+ static uint8_t current[HASH_KEY_SIZE];
+ static bool current_initialized = false;
+
+ /* Returns a hash function key to use. In order to keep things
+ * fast we will not generate a new key each time we allocate a
+ * new hash table. Instead, we'll just reuse the most recently
+ * generated one, except if we never generated one or when we
+ * are rehashing an entire hash table because we reached a
+ * fill level */
+
+ if (!current_initialized || !reuse_is_ok) {
+ random_bytes(current, sizeof(current));
+ current_initialized = true;
+ }
+
+ memcpy(hash_key, current, sizeof(current));
+}
+
+static struct hashmap_base_entry *bucket_at(HashmapBase *h, unsigned idx) {
+ return (struct hashmap_base_entry*)
+ ((uint8_t*) storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size);
+}
+
+static struct plain_hashmap_entry *plain_bucket_at(Hashmap *h, unsigned idx) {
+ return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct ordered_hashmap_entry *ordered_bucket_at(OrderedHashmap *h, unsigned idx) {
+ return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct set_entry *set_bucket_at(Set *h, unsigned idx) {
+ return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct ordered_hashmap_entry *bucket_at_swap(struct swap_entries *swap, unsigned idx) {
+ return &swap->e[idx - _IDX_SWAP_BEGIN];
+}
+
+/* Returns a pointer to the bucket at index idx.
+ * Understands real indexes and swap indexes, hence "_virtual". */
+static struct hashmap_base_entry *bucket_at_virtual(HashmapBase *h, struct swap_entries *swap,
+ unsigned idx) {
+ if (idx < _IDX_SWAP_BEGIN)
+ return bucket_at(h, idx);
+
+ if (idx < _IDX_SWAP_END)
+ return &bucket_at_swap(swap, idx)->p.b;
+
+ assert_not_reached("Invalid index");
+}
+
+static dib_raw_t *dib_raw_ptr(HashmapBase *h) {
+ return (dib_raw_t*)
+ ((uint8_t*) storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h));
+}
+
+static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) {
+ return idx >= from ? idx - from
+ : n_buckets(h) + idx - from;
+}
+
+static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) {
+ unsigned initial_bucket;
+
+ if (raw_dib == DIB_RAW_FREE)
+ return DIB_FREE;
+
+ if (_likely_(raw_dib < DIB_RAW_OVERFLOW))
+ return raw_dib;
+
+ /*
+ * Having an overflow DIB value is very unlikely. The hash function
+ * would have to be bad. For example, in a table of size 2^24 filled
+ * to load factor 0.9 the maximum observed DIB is only about 60.
+ * In theory (assuming I used Maxima correctly), for an infinite size
+ * hash table with load factor 0.8 the probability of a given entry
+ * having DIB > 40 is 1.9e-8.
+ * This returns the correct DIB value by recomputing the hash value in
+ * the unlikely case. XXX Hitting this case could be a hint to rehash.
+ */
+ initial_bucket = bucket_hash(h, bucket_at(h, idx)->key);
+ return bucket_distance(h, idx, initial_bucket);
+}
+
+static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) {
+ dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE;
+}
+
+static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) {
+ dib_raw_t *dibs;
+
+ dibs = dib_raw_ptr(h);
+
+ for ( ; idx < n_buckets(h); idx++)
+ if (dibs[idx] != DIB_RAW_FREE)
+ return idx;
+
+ return IDX_NIL;
+}
+
+static void bucket_mark_free(HashmapBase *h, unsigned idx) {
+ memzero(bucket_at(h, idx), hashmap_type_info[h->type].entry_size);
+ bucket_set_dib(h, idx, DIB_FREE);
+}
+
+static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap,
+ unsigned from, unsigned to) {
+ struct hashmap_base_entry *e_from, *e_to;
+
+ assert(from != to);
+
+ e_from = bucket_at_virtual(h, swap, from);
+ e_to = bucket_at_virtual(h, swap, to);
+
+ memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size);
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ struct ordered_hashmap_entry *le, *le_to;
+
+ le_to = (struct ordered_hashmap_entry*) e_to;
+
+ if (le_to->iterate_next != IDX_NIL) {
+ le = (struct ordered_hashmap_entry*)
+ bucket_at_virtual(h, swap, le_to->iterate_next);
+ le->iterate_previous = to;
+ }
+
+ if (le_to->iterate_previous != IDX_NIL) {
+ le = (struct ordered_hashmap_entry*)
+ bucket_at_virtual(h, swap, le_to->iterate_previous);
+ le->iterate_next = to;
+ }
+
+ if (lh->iterate_list_head == from)
+ lh->iterate_list_head = to;
+ if (lh->iterate_list_tail == from)
+ lh->iterate_list_tail = to;
+ }
+}
+
+static unsigned next_idx(HashmapBase *h, unsigned idx) {
+ return (idx + 1U) % n_buckets(h);
+}
+
+static unsigned prev_idx(HashmapBase *h, unsigned idx) {
+ return (n_buckets(h) + idx - 1U) % n_buckets(h);
+}
+
+static void *entry_value(HashmapBase *h, struct hashmap_base_entry *e) {
+ switch (h->type) {
+
+ case HASHMAP_TYPE_PLAIN:
+ case HASHMAP_TYPE_ORDERED:
+ return ((struct plain_hashmap_entry*)e)->value;
+
+ case HASHMAP_TYPE_SET:
+ return (void*) e->key;
+
+ default:
+ assert_not_reached("Unknown hashmap type");
+ }
+}
+
+static void base_remove_entry(HashmapBase *h, unsigned idx) {
+ unsigned left, right, prev, dib;
+ dib_raw_t raw_dib, *dibs;
+
+ dibs = dib_raw_ptr(h);
+ assert(dibs[idx] != DIB_RAW_FREE);
+
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.rem_count++;
+ h->debug.last_rem_idx = idx;
+#endif
+
+ left = idx;
+ /* Find the stop bucket ("right"). It is either free or has DIB == 0. */
+ for (right = next_idx(h, left); ; right = next_idx(h, right)) {
+ raw_dib = dibs[right];
+ if (IN_SET(raw_dib, 0, DIB_RAW_FREE))
+ break;
+
+ /* The buckets are not supposed to be all occupied and with DIB > 0.
+ * That would mean we could make everyone better off by shifting them
+ * backward. This scenario is impossible. */
+ assert(left != right);
+ }
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx);
+
+ if (le->iterate_next != IDX_NIL)
+ ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous;
+ else
+ lh->iterate_list_tail = le->iterate_previous;
+
+ if (le->iterate_previous != IDX_NIL)
+ ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next;
+ else
+ lh->iterate_list_head = le->iterate_next;
+ }
+
+ /* Now shift all buckets in the interval (left, right) one step backwards */
+ for (prev = left, left = next_idx(h, left); left != right;
+ prev = left, left = next_idx(h, left)) {
+ dib = bucket_calculate_dib(h, left, dibs[left]);
+ assert(dib != 0);
+ bucket_move_entry(h, NULL, left, prev);
+ bucket_set_dib(h, prev, dib - 1);
+ }
+
+ bucket_mark_free(h, prev);
+ n_entries_dec(h);
+ base_set_dirty(h);
+}
+#define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx)
+
+static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) {
+ struct ordered_hashmap_entry *e;
+ unsigned idx;
+
+ assert(h);
+ assert(i);
+
+ if (i->idx == IDX_NIL)
+ goto at_end;
+
+ if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL)
+ goto at_end;
+
+ if (i->idx == IDX_FIRST) {
+ idx = h->iterate_list_head;
+ e = ordered_bucket_at(h, idx);
+ } else {
+ idx = i->idx;
+ e = ordered_bucket_at(h, idx);
+ /*
+ * We allow removing the current entry while iterating, but removal may cause
+ * a backward shift. The next entry may thus move one bucket to the left.
+ * To detect when it happens, we remember the key pointer of the entry we were
+ * going to iterate next. If it does not match, there was a backward shift.
+ */
+ if (e->p.b.key != i->next_key) {
+ idx = prev_idx(HASHMAP_BASE(h), idx);
+ e = ordered_bucket_at(h, idx);
+ }
+ assert(e->p.b.key == i->next_key);
+ }
+
+#if ENABLE_DEBUG_HASHMAP
+ i->prev_idx = idx;
+#endif
+
+ if (e->iterate_next != IDX_NIL) {
+ struct ordered_hashmap_entry *n;
+ i->idx = e->iterate_next;
+ n = ordered_bucket_at(h, i->idx);
+ i->next_key = n->p.b.key;
+ } else
+ i->idx = IDX_NIL;
+
+ return idx;
+
+at_end:
+ i->idx = IDX_NIL;
+ return IDX_NIL;
+}
+
+static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) {
+ unsigned idx;
+
+ assert(h);
+ assert(i);
+
+ if (i->idx == IDX_NIL)
+ goto at_end;
+
+ if (i->idx == IDX_FIRST) {
+ /* fast forward to the first occupied bucket */
+ if (h->has_indirect) {
+ i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry);
+ h->indirect.idx_lowest_entry = i->idx;
+ } else
+ i->idx = skip_free_buckets(h, 0);
+
+ if (i->idx == IDX_NIL)
+ goto at_end;
+ } else {
+ struct hashmap_base_entry *e;
+
+ assert(i->idx > 0);
+
+ e = bucket_at(h, i->idx);
+ /*
+ * We allow removing the current entry while iterating, but removal may cause
+ * a backward shift. The next entry may thus move one bucket to the left.
+ * To detect when it happens, we remember the key pointer of the entry we were
+ * going to iterate next. If it does not match, there was a backward shift.
+ */
+ if (e->key != i->next_key)
+ e = bucket_at(h, --i->idx);
+
+ assert(e->key == i->next_key);
+ }
+
+ idx = i->idx;
+#if ENABLE_DEBUG_HASHMAP
+ i->prev_idx = idx;
+#endif
+
+ i->idx = skip_free_buckets(h, i->idx + 1);
+ if (i->idx != IDX_NIL)
+ i->next_key = bucket_at(h, i->idx)->key;
+ else
+ i->idx = IDX_NIL;
+
+ return idx;
+
+at_end:
+ i->idx = IDX_NIL;
+ return IDX_NIL;
+}
+
+static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) {
+ if (!h) {
+ i->idx = IDX_NIL;
+ return IDX_NIL;
+ }
+
+#if ENABLE_DEBUG_HASHMAP
+ if (i->idx == IDX_FIRST) {
+ i->put_count = h->debug.put_count;
+ i->rem_count = h->debug.rem_count;
+ } else {
+ /* While iterating, must not add any new entries */
+ assert(i->put_count == h->debug.put_count);
+ /* ... or remove entries other than the current one */
+ assert(i->rem_count == h->debug.rem_count ||
+ (i->rem_count == h->debug.rem_count - 1 &&
+ i->prev_idx == h->debug.last_rem_idx));
+ /* Reset our removals counter */
+ i->rem_count = h->debug.rem_count;
+ }
+#endif
+
+ return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i)
+ : hashmap_iterate_in_internal_order(h, i);
+}
+
+bool internal_hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key) {
+ struct hashmap_base_entry *e;
+ void *data;
+ unsigned idx;
+
+ idx = hashmap_iterate_entry(h, i);
+ if (idx == IDX_NIL) {
+ if (value)
+ *value = NULL;
+ if (key)
+ *key = NULL;
+
+ return false;
+ }
+
+ e = bucket_at(h, idx);
+ data = entry_value(h, e);
+ if (value)
+ *value = data;
+ if (key)
+ *key = e->key;
+
+ return true;
+}
+
+bool set_iterate(Set *s, Iterator *i, void **value) {
+ return internal_hashmap_iterate(HASHMAP_BASE(s), i, value, NULL);
+}
+
+#define HASHMAP_FOREACH_IDX(idx, h, i) \
+ for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \
+ (idx != IDX_NIL); \
+ (idx) = hashmap_iterate_entry((h), &(i)))
+
+IteratedCache *internal_hashmap_iterated_cache_new(HashmapBase *h) {
+ IteratedCache *cache;
+
+ assert(h);
+ assert(!h->cached);
+
+ if (h->cached)
+ return NULL;
+
+ cache = new0(IteratedCache, 1);
+ if (!cache)
+ return NULL;
+
+ cache->hashmap = h;
+ h->cached = true;
+
+ return cache;
+}
+
+static void reset_direct_storage(HashmapBase *h) {
+ const struct hashmap_type_info *hi = &hashmap_type_info[h->type];
+ void *p;
+
+ assert(!h->has_indirect);
+
+ p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets);
+ memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets);
+}
+
+static struct HashmapBase *hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) {
+ HashmapBase *h;
+ const struct hashmap_type_info *hi = &hashmap_type_info[type];
+ bool up;
+
+ up = mempool_enabled();
+
+ h = up ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size);
+ if (!h)
+ return NULL;
+
+ h->type = type;
+ h->from_pool = up;
+ h->hash_ops = hash_ops ?: &trivial_hash_ops;
+
+ if (type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*)h;
+ lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
+ }
+
+ reset_direct_storage(h);
+
+ if (!shared_hash_key_initialized) {
+ random_bytes(shared_hash_key, sizeof(shared_hash_key));
+ shared_hash_key_initialized= true;
+ }
+
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.func = func;
+ h->debug.file = file;
+ h->debug.line = line;
+ assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
+ LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug);
+ assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
+#endif
+
+ return h;
+}
+
+Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (Hashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
+}
+
+OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
+}
+
+Set *internal_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (Set*) hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
+}
+
+static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops,
+ enum HashmapType type HASHMAP_DEBUG_PARAMS) {
+ HashmapBase *q;
+
+ assert(h);
+
+ if (*h)
+ return 0;
+
+ q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS);
+ if (!q)
+ return -ENOMEM;
+
+ *h = q;
+ return 0;
+}
+
+int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
+}
+
+int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
+}
+
+int internal_set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
+}
+
+static void hashmap_free_no_clear(HashmapBase *h) {
+ assert(!h->has_indirect);
+ assert(h->n_direct_entries == 0);
+
+#if ENABLE_DEBUG_HASHMAP
+ assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
+ LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug);
+ assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
+#endif
+
+ if (h->from_pool) {
+ /* Ensure that the object didn't get migrated between threads. */
+ assert_se(is_main_thread());
+ mempool_free_tile(hashmap_type_info[h->type].mempool, h);
+ } else
+ free(h);
+}
+
+HashmapBase *internal_hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
+ if (h) {
+ internal_hashmap_clear(h, default_free_key, default_free_value);
+ hashmap_free_no_clear(h);
+ }
+
+ return NULL;
+}
+
+void internal_hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
+ free_func_t free_key, free_value;
+ if (!h)
+ return;
+
+ free_key = h->hash_ops->free_key ?: default_free_key;
+ free_value = h->hash_ops->free_value ?: default_free_value;
+
+ if (free_key || free_value) {
+
+ /* If destructor calls are defined, let's destroy things defensively: let's take the item out of the
+ * hash table, and only then call the destructor functions. If these destructors then try to unregister
+ * themselves from our hash table a second time, the entry is already gone. */
+
+ while (internal_hashmap_size(h) > 0) {
+ void *k = NULL;
+ void *v;
+
+ v = internal_hashmap_first_key_and_value(h, true, &k);
+
+ if (free_key)
+ free_key(k);
+
+ if (free_value)
+ free_value(v);
+ }
+ }
+
+ if (h->has_indirect) {
+ free(h->indirect.storage);
+ h->has_indirect = false;
+ }
+
+ h->n_direct_entries = 0;
+ reset_direct_storage(h);
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
+ }
+
+ base_set_dirty(h);
+}
+
+static int resize_buckets(HashmapBase *h, unsigned entries_add);
+
+/*
+ * Finds an empty bucket to put an entry into, starting the scan at 'idx'.
+ * Performs Robin Hood swaps as it goes. The entry to put must be placed
+ * by the caller into swap slot IDX_PUT.
+ * If used for in-place resizing, may leave a displaced entry in swap slot
+ * IDX_PUT. Caller must rehash it next.
+ * Returns: true if it left a displaced entry to rehash next in IDX_PUT,
+ * false otherwise.
+ */
+static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx,
+ struct swap_entries *swap) {
+ dib_raw_t raw_dib, *dibs;
+ unsigned dib, distance;
+
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.put_count++;
+#endif
+
+ dibs = dib_raw_ptr(h);
+
+ for (distance = 0; ; distance++) {
+ raw_dib = dibs[idx];
+ if (IN_SET(raw_dib, DIB_RAW_FREE, DIB_RAW_REHASH)) {
+ if (raw_dib == DIB_RAW_REHASH)
+ bucket_move_entry(h, swap, idx, IDX_TMP);
+
+ if (h->has_indirect && h->indirect.idx_lowest_entry > idx)
+ h->indirect.idx_lowest_entry = idx;
+
+ bucket_set_dib(h, idx, distance);
+ bucket_move_entry(h, swap, IDX_PUT, idx);
+ if (raw_dib == DIB_RAW_REHASH) {
+ bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
+ return true;
+ }
+
+ return false;
+ }
+
+ dib = bucket_calculate_dib(h, idx, raw_dib);
+
+ if (dib < distance) {
+ /* Found a wealthier entry. Go Robin Hood! */
+ bucket_set_dib(h, idx, distance);
+
+ /* swap the entries */
+ bucket_move_entry(h, swap, idx, IDX_TMP);
+ bucket_move_entry(h, swap, IDX_PUT, idx);
+ bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
+
+ distance = dib;
+ }
+
+ idx = next_idx(h, idx);
+ }
+}
+
+/*
+ * Puts an entry into a hashmap, boldly - no check whether key already exists.
+ * The caller must place the entry (only its key and value, not link indexes)
+ * in swap slot IDX_PUT.
+ * Caller must ensure: the key does not exist yet in the hashmap.
+ * that resize is not needed if !may_resize.
+ * Returns: 1 if entry was put successfully.
+ * -ENOMEM if may_resize==true and resize failed with -ENOMEM.
+ * Cannot return -ENOMEM if !may_resize.
+ */
+static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx,
+ struct swap_entries *swap, bool may_resize) {
+ struct ordered_hashmap_entry *new_entry;
+ int r;
+
+ assert(idx < n_buckets(h));
+
+ new_entry = bucket_at_swap(swap, IDX_PUT);
+
+ if (may_resize) {
+ r = resize_buckets(h, 1);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ idx = bucket_hash(h, new_entry->p.b.key);
+ }
+ assert(n_entries(h) < n_buckets(h));
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+
+ new_entry->iterate_next = IDX_NIL;
+ new_entry->iterate_previous = lh->iterate_list_tail;
+
+ if (lh->iterate_list_tail != IDX_NIL) {
+ struct ordered_hashmap_entry *old_tail;
+
+ old_tail = ordered_bucket_at(lh, lh->iterate_list_tail);
+ assert(old_tail->iterate_next == IDX_NIL);
+ old_tail->iterate_next = IDX_PUT;
+ }
+
+ lh->iterate_list_tail = IDX_PUT;
+ if (lh->iterate_list_head == IDX_NIL)
+ lh->iterate_list_head = IDX_PUT;
+ }
+
+ assert_se(hashmap_put_robin_hood(h, idx, swap) == false);
+
+ n_entries_inc(h);
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h));
+#endif
+
+ base_set_dirty(h);
+
+ return 1;
+}
+#define hashmap_put_boldly(h, idx, swap, may_resize) \
+ hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize)
+
+/*
+ * Returns 0 if resize is not needed.
+ * 1 if successfully resized.
+ * -ENOMEM on allocation failure.
+ */
+static int resize_buckets(HashmapBase *h, unsigned entries_add) {
+ struct swap_entries swap;
+ void *new_storage;
+ dib_raw_t *old_dibs, *new_dibs;
+ const struct hashmap_type_info *hi;
+ unsigned idx, optimal_idx;
+ unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries;
+ uint8_t new_shift;
+ bool rehash_next;
+
+ assert(h);
+
+ hi = &hashmap_type_info[h->type];
+ new_n_entries = n_entries(h) + entries_add;
+
+ /* overflow? */
+ if (_unlikely_(new_n_entries < entries_add))
+ return -ENOMEM;
+
+ /* For direct storage we allow 100% load, because it's tiny. */
+ if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets)
+ return 0;
+
+ /*
+ * Load factor = n/m = 1 - (1/INV_KEEP_FREE).
+ * From it follows: m = n + n/(INV_KEEP_FREE - 1)
+ */
+ new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1);
+ /* overflow? */
+ if (_unlikely_(new_n_buckets < new_n_entries))
+ return -ENOMEM;
+
+ if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t))))
+ return -ENOMEM;
+
+ old_n_buckets = n_buckets(h);
+
+ if (_likely_(new_n_buckets <= old_n_buckets))
+ return 0;
+
+ new_shift = log2u_round_up(MAX(
+ new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)),
+ 2 * sizeof(struct direct_storage)));
+
+ /* Realloc storage (buckets and DIB array). */
+ new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL,
+ 1U << new_shift);
+ if (!new_storage)
+ return -ENOMEM;
+
+ /* Must upgrade direct to indirect storage. */
+ if (!h->has_indirect) {
+ memcpy(new_storage, h->direct.storage,
+ old_n_buckets * (hi->entry_size + sizeof(dib_raw_t)));
+ h->indirect.n_entries = h->n_direct_entries;
+ h->indirect.idx_lowest_entry = 0;
+ h->n_direct_entries = 0;
+ }
+
+ /* Get a new hash key. If we've just upgraded to indirect storage,
+ * allow reusing a previously generated key. It's still a different key
+ * from the shared one that we used for direct storage. */
+ get_hash_key(h->indirect.hash_key, !h->has_indirect);
+
+ h->has_indirect = true;
+ h->indirect.storage = new_storage;
+ h->indirect.n_buckets = (1U << new_shift) /
+ (hi->entry_size + sizeof(dib_raw_t));
+
+ old_dibs = (dib_raw_t*)((uint8_t*) new_storage + hi->entry_size * old_n_buckets);
+ new_dibs = dib_raw_ptr(h);
+
+ /*
+ * Move the DIB array to the new place, replacing valid DIB values with
+ * DIB_RAW_REHASH to indicate all of the used buckets need rehashing.
+ * Note: Overlap is not possible, because we have at least doubled the
+ * number of buckets and dib_raw_t is smaller than any entry type.
+ */
+ for (idx = 0; idx < old_n_buckets; idx++) {
+ assert(old_dibs[idx] != DIB_RAW_REHASH);
+ new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE
+ : DIB_RAW_REHASH;
+ }
+
+ /* Zero the area of newly added entries (including the old DIB area) */
+ memzero(bucket_at(h, old_n_buckets),
+ (n_buckets(h) - old_n_buckets) * hi->entry_size);
+
+ /* The upper half of the new DIB array needs initialization */
+ memset(&new_dibs[old_n_buckets], DIB_RAW_INIT,
+ (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t));
+
+ /* Rehash entries that need it */
+ n_rehashed = 0;
+ for (idx = 0; idx < old_n_buckets; idx++) {
+ if (new_dibs[idx] != DIB_RAW_REHASH)
+ continue;
+
+ optimal_idx = bucket_hash(h, bucket_at(h, idx)->key);
+
+ /*
+ * Not much to do if by luck the entry hashes to its current
+ * location. Just set its DIB.
+ */
+ if (optimal_idx == idx) {
+ new_dibs[idx] = 0;
+ n_rehashed++;
+ continue;
+ }
+
+ new_dibs[idx] = DIB_RAW_FREE;
+ bucket_move_entry(h, &swap, idx, IDX_PUT);
+ /* bucket_move_entry does not clear the source */
+ memzero(bucket_at(h, idx), hi->entry_size);
+
+ do {
+ /*
+ * Find the new bucket for the current entry. This may make
+ * another entry homeless and load it into IDX_PUT.
+ */
+ rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap);
+ n_rehashed++;
+
+ /* Did the current entry displace another one? */
+ if (rehash_next)
+ optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key);
+ } while (rehash_next);
+ }
+
+ assert(n_rehashed == n_entries(h));
+
+ return 1;
+}
+
+/*
+ * Finds an entry with a matching key
+ * Returns: index of the found entry, or IDX_NIL if not found.
+ */
+static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned dib, distance;
+ dib_raw_t *dibs = dib_raw_ptr(h);
+
+ assert(idx < n_buckets(h));
+
+ for (distance = 0; ; distance++) {
+ if (dibs[idx] == DIB_RAW_FREE)
+ return IDX_NIL;
+
+ dib = bucket_calculate_dib(h, idx, dibs[idx]);
+
+ if (dib < distance)
+ return IDX_NIL;
+ if (dib == distance) {
+ e = bucket_at(h, idx);
+ if (h->hash_ops->compare(e->key, key) == 0)
+ return idx;
+ }
+
+ idx = next_idx(h, idx);
+ }
+}
+#define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key)
+
+int hashmap_put(Hashmap *h, const void *key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(h);
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx != IDX_NIL) {
+ e = plain_bucket_at(h, idx);
+ if (e->value == value)
+ return 0;
+ return -EEXIST;
+ }
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = key;
+ e->value = value;
+ return hashmap_put_boldly(h, hash, &swap, true);
+}
+
+int set_put(Set *s, const void *key) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+
+ assert(s);
+
+ hash = bucket_hash(s, key);
+ idx = bucket_scan(s, hash, key);
+ if (idx != IDX_NIL)
+ return 0;
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ e->key = key;
+ return hashmap_put_boldly(s, hash, &swap, true);
+}
+
+int hashmap_replace(Hashmap *h, const void *key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(h);
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx != IDX_NIL) {
+ e = plain_bucket_at(h, idx);
+#if ENABLE_DEBUG_HASHMAP
+ /* Although the key is equal, the key pointer may have changed,
+ * and this would break our assumption for iterating. So count
+ * this operation as incompatible with iteration. */
+ if (e->b.key != key) {
+ h->b.debug.put_count++;
+ h->b.debug.rem_count++;
+ h->b.debug.last_rem_idx = idx;
+ }
+#endif
+ e->b.key = key;
+ e->value = value;
+ hashmap_set_dirty(h);
+
+ return 0;
+ }
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = key;
+ e->value = value;
+ return hashmap_put_boldly(h, hash, &swap, true);
+}
+
+int hashmap_update(Hashmap *h, const void *key, void *value) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(h);
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ e = plain_bucket_at(h, idx);
+ e->value = value;
+ hashmap_set_dirty(h);
+
+ return 0;
+}
+
+void *internal_hashmap_get(HashmapBase *h, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ return entry_value(h, e);
+}
+
+void *hashmap_get2(Hashmap *h, const void *key, void **key2) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = plain_bucket_at(h, idx);
+ if (key2)
+ *key2 = (void*) e->b.key;
+
+ return e->value;
+}
+
+bool internal_hashmap_contains(HashmapBase *h, const void *key) {
+ unsigned hash;
+
+ if (!h)
+ return false;
+
+ hash = bucket_hash(h, key);
+ return bucket_scan(h, hash, key) != IDX_NIL;
+}
+
+void *internal_hashmap_remove(HashmapBase *h, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+ void *data;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ data = entry_value(h, e);
+ remove_entry(h, idx);
+
+ return data;
+}
+
+void *hashmap_remove2(Hashmap *h, const void *key, void **rkey) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+ void *data;
+
+ if (!h) {
+ if (rkey)
+ *rkey = NULL;
+ return NULL;
+ }
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL) {
+ if (rkey)
+ *rkey = NULL;
+ return NULL;
+ }
+
+ e = plain_bucket_at(h, idx);
+ data = e->value;
+ if (rkey)
+ *rkey = (void*) e->b.key;
+
+ remove_entry(h, idx);
+
+ return data;
+}
+
+int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned old_hash, new_hash, idx;
+
+ if (!h)
+ return -ENOENT;
+
+ old_hash = bucket_hash(h, old_key);
+ idx = bucket_scan(h, old_hash, old_key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ new_hash = bucket_hash(h, new_key);
+ if (bucket_scan(h, new_hash, new_key) != IDX_NIL)
+ return -EEXIST;
+
+ remove_entry(h, idx);
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = new_key;
+ e->value = value;
+ assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
+
+ return 0;
+}
+
+int set_remove_and_put(Set *s, const void *old_key, const void *new_key) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e;
+ unsigned old_hash, new_hash, idx;
+
+ if (!s)
+ return -ENOENT;
+
+ old_hash = bucket_hash(s, old_key);
+ idx = bucket_scan(s, old_hash, old_key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ new_hash = bucket_hash(s, new_key);
+ if (bucket_scan(s, new_hash, new_key) != IDX_NIL)
+ return -EEXIST;
+
+ remove_entry(s, idx);
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ e->key = new_key;
+ assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1);
+
+ return 0;
+}
+
+int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned old_hash, new_hash, idx_old, idx_new;
+
+ if (!h)
+ return -ENOENT;
+
+ old_hash = bucket_hash(h, old_key);
+ idx_old = bucket_scan(h, old_hash, old_key);
+ if (idx_old == IDX_NIL)
+ return -ENOENT;
+
+ old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key;
+
+ new_hash = bucket_hash(h, new_key);
+ idx_new = bucket_scan(h, new_hash, new_key);
+ if (idx_new != IDX_NIL)
+ if (idx_old != idx_new) {
+ remove_entry(h, idx_new);
+ /* Compensate for a possible backward shift. */
+ if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key)
+ idx_old = prev_idx(HASHMAP_BASE(h), idx_old);
+ assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key);
+ }
+
+ remove_entry(h, idx_old);
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = new_key;
+ e->value = value;
+ assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
+
+ return 0;
+}
+
+void *internal_hashmap_remove_value(HashmapBase *h, const void *key, void *value) {
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ if (entry_value(h, e) != value)
+ return NULL;
+
+ remove_entry(h, idx);
+
+ return value;
+}
+
+static unsigned find_first_entry(HashmapBase *h) {
+ Iterator i = ITERATOR_FIRST;
+
+ if (!h || !n_entries(h))
+ return IDX_NIL;
+
+ return hashmap_iterate_entry(h, &i);
+}
+
+void *internal_hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key) {
+ struct hashmap_base_entry *e;
+ void *key, *data;
+ unsigned idx;
+
+ idx = find_first_entry(h);
+ if (idx == IDX_NIL) {
+ if (ret_key)
+ *ret_key = NULL;
+ return NULL;
+ }
+
+ e = bucket_at(h, idx);
+ key = (void*) e->key;
+ data = entry_value(h, e);
+
+ if (remove)
+ remove_entry(h, idx);
+
+ if (ret_key)
+ *ret_key = key;
+
+ return data;
+}
+
+unsigned internal_hashmap_size(HashmapBase *h) {
+
+ if (!h)
+ return 0;
+
+ return n_entries(h);
+}
+
+unsigned internal_hashmap_buckets(HashmapBase *h) {
+
+ if (!h)
+ return 0;
+
+ return n_buckets(h);
+}
+
+int internal_hashmap_merge(Hashmap *h, Hashmap *other) {
+ Iterator i;
+ unsigned idx;
+
+ assert(h);
+
+ HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
+ struct plain_hashmap_entry *pe = plain_bucket_at(other, idx);
+ int r;
+
+ r = hashmap_put(h, pe->b.key, pe->value);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ return 0;
+}
+
+int set_merge(Set *s, Set *other) {
+ Iterator i;
+ unsigned idx;
+
+ assert(s);
+
+ HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
+ struct set_entry *se = set_bucket_at(other, idx);
+ int r;
+
+ r = set_put(s, se->b.key);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add) {
+ int r;
+
+ assert(h);
+
+ r = resize_buckets(h, entries_add);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+/*
+ * The same as hashmap_merge(), but every new item from other is moved to h.
+ * Keys already in h are skipped and stay in other.
+ * Returns: 0 on success.
+ * -ENOMEM on alloc failure, in which case no move has been done.
+ */
+int internal_hashmap_move(HashmapBase *h, HashmapBase *other) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e, *n;
+ Iterator i;
+ unsigned idx;
+ int r;
+
+ assert(h);
+
+ if (!other)
+ return 0;
+
+ assert(other->type == h->type);
+
+ /*
+ * This reserves buckets for the worst case, where none of other's
+ * entries are yet present in h. This is preferable to risking
+ * an allocation failure in the middle of the moving and having to
+ * rollback or return a partial result.
+ */
+ r = resize_buckets(h, n_entries(other));
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_IDX(idx, other, i) {
+ unsigned h_hash;
+
+ e = bucket_at(other, idx);
+ h_hash = bucket_hash(h, e->key);
+ if (bucket_scan(h, h_hash, e->key) != IDX_NIL)
+ continue;
+
+ n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ n->key = e->key;
+ if (h->type != HASHMAP_TYPE_SET)
+ ((struct plain_hashmap_entry*) n)->value =
+ ((struct plain_hashmap_entry*) e)->value;
+ assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1);
+
+ remove_entry(other, idx);
+ }
+
+ return 0;
+}
+
+int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) {
+ struct swap_entries swap;
+ unsigned h_hash, other_hash, idx;
+ struct hashmap_base_entry *e, *n;
+ int r;
+
+ assert(h);
+
+ h_hash = bucket_hash(h, key);
+ if (bucket_scan(h, h_hash, key) != IDX_NIL)
+ return -EEXIST;
+
+ if (!other)
+ return -ENOENT;
+
+ assert(other->type == h->type);
+
+ other_hash = bucket_hash(other, key);
+ idx = bucket_scan(other, other_hash, key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ e = bucket_at(other, idx);
+
+ n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ n->key = e->key;
+ if (h->type != HASHMAP_TYPE_SET)
+ ((struct plain_hashmap_entry*) n)->value =
+ ((struct plain_hashmap_entry*) e)->value;
+ r = hashmap_put_boldly(h, h_hash, &swap, true);
+ if (r < 0)
+ return r;
+
+ remove_entry(other, idx);
+ return 0;
+}
+
+HashmapBase *internal_hashmap_copy(HashmapBase *h) {
+ HashmapBase *copy;
+ int r;
+
+ assert(h);
+
+ copy = hashmap_base_new(h->hash_ops, h->type HASHMAP_DEBUG_SRC_ARGS);
+ if (!copy)
+ return NULL;
+
+ switch (h->type) {
+ case HASHMAP_TYPE_PLAIN:
+ case HASHMAP_TYPE_ORDERED:
+ r = hashmap_merge((Hashmap*)copy, (Hashmap*)h);
+ break;
+ case HASHMAP_TYPE_SET:
+ r = set_merge((Set*)copy, (Set*)h);
+ break;
+ default:
+ assert_not_reached("Unknown hashmap type");
+ }
+
+ if (r < 0) {
+ internal_hashmap_free(copy, false, false);
+ return NULL;
+ }
+
+ return copy;
+}
+
+char **internal_hashmap_get_strv(HashmapBase *h) {
+ char **sv;
+ Iterator i;
+ unsigned idx, n;
+
+ sv = new(char*, n_entries(h)+1);
+ if (!sv)
+ return NULL;
+
+ n = 0;
+ HASHMAP_FOREACH_IDX(idx, h, i)
+ sv[n++] = entry_value(h, bucket_at(h, idx));
+ sv[n] = NULL;
+
+ return sv;
+}
+
+void *ordered_hashmap_next(OrderedHashmap *h, const void *key) {
+ struct ordered_hashmap_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = ordered_bucket_at(h, idx);
+ if (e->iterate_next == IDX_NIL)
+ return NULL;
+ return ordered_bucket_at(h, e->iterate_next)->p.value;
+}
+
+int set_consume(Set *s, void *value) {
+ int r;
+
+ assert(s);
+ assert(value);
+
+ r = set_put(s, value);
+ if (r <= 0)
+ free(value);
+
+ return r;
+}
+
+int set_put_strdup(Set *s, const char *p) {
+ char *c;
+
+ assert(s);
+ assert(p);
+
+ if (set_contains(s, (char*) p))
+ return 0;
+
+ c = strdup(p);
+ if (!c)
+ return -ENOMEM;
+
+ return set_consume(s, c);
+}
+
+int set_put_strdupv(Set *s, char **l) {
+ int n = 0, r;
+ char **i;
+
+ assert(s);
+
+ STRV_FOREACH(i, l) {
+ r = set_put_strdup(s, *i);
+ if (r < 0)
+ return r;
+
+ n += r;
+ }
+
+ return n;
+}
+
+int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags) {
+ const char *p = v;
+ int r;
+
+ assert(s);
+ assert(v);
+
+ for (;;) {
+ char *word;
+
+ r = extract_first_word(&p, &word, separators, flags);
+ if (r <= 0)
+ return r;
+
+ r = set_consume(s, word);
+ if (r < 0)
+ return r;
+ }
+}
+
+/* expand the cachemem if needed, return true if newly (re)activated. */
+static int cachemem_maintain(CacheMem *mem, unsigned size) {
+ assert(mem);
+
+ if (!GREEDY_REALLOC(mem->ptr, mem->n_allocated, size)) {
+ if (size > 0)
+ return -ENOMEM;
+ }
+
+ if (!mem->active) {
+ mem->active = true;
+ return true;
+ }
+
+ return false;
+}
+
+int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries) {
+ bool sync_keys = false, sync_values = false;
+ unsigned size;
+ int r;
+
+ assert(cache);
+ assert(cache->hashmap);
+
+ size = n_entries(cache->hashmap);
+
+ if (res_keys) {
+ r = cachemem_maintain(&cache->keys, size);
+ if (r < 0)
+ return r;
+
+ sync_keys = r;
+ } else
+ cache->keys.active = false;
+
+ if (res_values) {
+ r = cachemem_maintain(&cache->values, size);
+ if (r < 0)
+ return r;
+
+ sync_values = r;
+ } else
+ cache->values.active = false;
+
+ if (cache->hashmap->dirty) {
+ if (cache->keys.active)
+ sync_keys = true;
+ if (cache->values.active)
+ sync_values = true;
+
+ cache->hashmap->dirty = false;
+ }
+
+ if (sync_keys || sync_values) {
+ unsigned i, idx;
+ Iterator iter;
+
+ i = 0;
+ HASHMAP_FOREACH_IDX(idx, cache->hashmap, iter) {
+ struct hashmap_base_entry *e;
+
+ e = bucket_at(cache->hashmap, idx);
+
+ if (sync_keys)
+ cache->keys.ptr[i] = e->key;
+ if (sync_values)
+ cache->values.ptr[i] = entry_value(cache->hashmap, e);
+ i++;
+ }
+ }
+
+ if (res_keys)
+ *res_keys = cache->keys.ptr;
+ if (res_values)
+ *res_values = cache->values.ptr;
+ if (res_n_entries)
+ *res_n_entries = size;
+
+ return 0;
+}
+
+IteratedCache *iterated_cache_free(IteratedCache *cache) {
+ if (cache) {
+ free(cache->keys.ptr);
+ free(cache->values.ptr);
+ free(cache);
+ }
+
+ return NULL;
+}
diff --git a/src/basic/hashmap.h b/src/basic/hashmap.h
new file mode 100644
index 0000000..e16a9f9
--- /dev/null
+++ b/src/basic/hashmap.h
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "hash-funcs.h"
+#include "macro.h"
+#include "util.h"
+
+/*
+ * A hash table implementation. As a minor optimization a NULL hashmap object
+ * will be treated as empty hashmap for all read operations. That way it is not
+ * necessary to instantiate an object for each Hashmap use.
+ *
+ * If ENABLE_DEBUG_HASHMAP is defined (by configuring with --enable-debug=hashmap),
+ * the implementation will:
+ * - store extra data for debugging and statistics (see tools/gdb-sd_dump_hashmaps.py)
+ * - perform extra checks for invalid use of iterators
+ */
+
+#define HASH_KEY_SIZE 16
+
+typedef void* (*hashmap_destroy_t)(void *p);
+
+/* The base type for all hashmap and set types. Many functions in the
+ * implementation take (HashmapBase*) parameters and are run-time polymorphic,
+ * though the API is not meant to be polymorphic (do not call functions
+ * internal_*() directly). */
+typedef struct HashmapBase HashmapBase;
+
+/* Specific hashmap/set types */
+typedef struct Hashmap Hashmap; /* Maps keys to values */
+typedef struct OrderedHashmap OrderedHashmap; /* Like Hashmap, but also remembers entry insertion order */
+typedef struct Set Set; /* Stores just keys */
+
+typedef struct IteratedCache IteratedCache; /* Caches the iterated order of one of the above */
+
+/* Ideally the Iterator would be an opaque struct, but it is instantiated
+ * by hashmap users, so the definition has to be here. Do not use its fields
+ * directly. */
+typedef struct {
+ unsigned idx; /* index of an entry to be iterated next */
+ const void *next_key; /* expected value of that entry's key pointer */
+#if ENABLE_DEBUG_HASHMAP
+ unsigned put_count; /* hashmap's put_count recorded at start of iteration */
+ unsigned rem_count; /* hashmap's rem_count in previous iteration */
+ unsigned prev_idx; /* idx in previous iteration */
+#endif
+} Iterator;
+
+#define _IDX_ITERATOR_FIRST (UINT_MAX - 1)
+#define ITERATOR_FIRST ((Iterator) { .idx = _IDX_ITERATOR_FIRST, .next_key = NULL })
+
+/* Macros for type checking */
+#define PTR_COMPATIBLE_WITH_HASHMAP_BASE(h) \
+ (__builtin_types_compatible_p(typeof(h), HashmapBase*) || \
+ __builtin_types_compatible_p(typeof(h), Hashmap*) || \
+ __builtin_types_compatible_p(typeof(h), OrderedHashmap*) || \
+ __builtin_types_compatible_p(typeof(h), Set*))
+
+#define PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h) \
+ (__builtin_types_compatible_p(typeof(h), Hashmap*) || \
+ __builtin_types_compatible_p(typeof(h), OrderedHashmap*)) \
+
+#define HASHMAP_BASE(h) \
+ __builtin_choose_expr(PTR_COMPATIBLE_WITH_HASHMAP_BASE(h), \
+ (HashmapBase*)(h), \
+ (void)0)
+
+#define PLAIN_HASHMAP(h) \
+ __builtin_choose_expr(PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h), \
+ (Hashmap*)(h), \
+ (void)0)
+
+#if ENABLE_DEBUG_HASHMAP
+# define HASHMAP_DEBUG_PARAMS , const char *func, const char *file, int line
+# define HASHMAP_DEBUG_SRC_ARGS , __func__, __FILE__, __LINE__
+# define HASHMAP_DEBUG_PASS_ARGS , func, file, line
+#else
+# define HASHMAP_DEBUG_PARAMS
+# define HASHMAP_DEBUG_SRC_ARGS
+# define HASHMAP_DEBUG_PASS_ARGS
+#endif
+
+Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define hashmap_new(ops) internal_hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS)
+#define ordered_hashmap_new(ops) internal_ordered_hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS)
+
+HashmapBase *internal_hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value);
+static inline Hashmap *hashmap_free(Hashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, NULL);
+}
+static inline OrderedHashmap *ordered_hashmap_free(OrderedHashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, NULL);
+}
+
+static inline Hashmap *hashmap_free_free(Hashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, free);
+}
+static inline OrderedHashmap *ordered_hashmap_free_free(OrderedHashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, free);
+}
+
+static inline Hashmap *hashmap_free_free_key(Hashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, NULL);
+}
+static inline OrderedHashmap *ordered_hashmap_free_free_key(OrderedHashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, NULL);
+}
+
+static inline Hashmap *hashmap_free_free_free(Hashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, free);
+}
+static inline OrderedHashmap *ordered_hashmap_free_free_free(OrderedHashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, free);
+}
+
+IteratedCache *iterated_cache_free(IteratedCache *cache);
+int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries);
+
+HashmapBase *internal_hashmap_copy(HashmapBase *h);
+static inline Hashmap *hashmap_copy(Hashmap *h) {
+ return (Hashmap*) internal_hashmap_copy(HASHMAP_BASE(h));
+}
+static inline OrderedHashmap *ordered_hashmap_copy(OrderedHashmap *h) {
+ return (OrderedHashmap*) internal_hashmap_copy(HASHMAP_BASE(h));
+}
+
+int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define hashmap_ensure_allocated(h, ops) internal_hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS)
+#define ordered_hashmap_ensure_allocated(h, ops) internal_ordered_hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS)
+
+IteratedCache *internal_hashmap_iterated_cache_new(HashmapBase *h);
+static inline IteratedCache *hashmap_iterated_cache_new(Hashmap *h) {
+ return (IteratedCache*) internal_hashmap_iterated_cache_new(HASHMAP_BASE(h));
+}
+static inline IteratedCache *ordered_hashmap_iterated_cache_new(OrderedHashmap *h) {
+ return (IteratedCache*) internal_hashmap_iterated_cache_new(HASHMAP_BASE(h));
+}
+
+int hashmap_put(Hashmap *h, const void *key, void *value);
+static inline int ordered_hashmap_put(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_put(PLAIN_HASHMAP(h), key, value);
+}
+
+int hashmap_update(Hashmap *h, const void *key, void *value);
+static inline int ordered_hashmap_update(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_update(PLAIN_HASHMAP(h), key, value);
+}
+
+int hashmap_replace(Hashmap *h, const void *key, void *value);
+static inline int ordered_hashmap_replace(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_replace(PLAIN_HASHMAP(h), key, value);
+}
+
+void *internal_hashmap_get(HashmapBase *h, const void *key);
+static inline void *hashmap_get(Hashmap *h, const void *key) {
+ return internal_hashmap_get(HASHMAP_BASE(h), key);
+}
+static inline void *ordered_hashmap_get(OrderedHashmap *h, const void *key) {
+ return internal_hashmap_get(HASHMAP_BASE(h), key);
+}
+
+void *hashmap_get2(Hashmap *h, const void *key, void **rkey);
+static inline void *ordered_hashmap_get2(OrderedHashmap *h, const void *key, void **rkey) {
+ return hashmap_get2(PLAIN_HASHMAP(h), key, rkey);
+}
+
+bool internal_hashmap_contains(HashmapBase *h, const void *key);
+static inline bool hashmap_contains(Hashmap *h, const void *key) {
+ return internal_hashmap_contains(HASHMAP_BASE(h), key);
+}
+static inline bool ordered_hashmap_contains(OrderedHashmap *h, const void *key) {
+ return internal_hashmap_contains(HASHMAP_BASE(h), key);
+}
+
+void *internal_hashmap_remove(HashmapBase *h, const void *key);
+static inline void *hashmap_remove(Hashmap *h, const void *key) {
+ return internal_hashmap_remove(HASHMAP_BASE(h), key);
+}
+static inline void *ordered_hashmap_remove(OrderedHashmap *h, const void *key) {
+ return internal_hashmap_remove(HASHMAP_BASE(h), key);
+}
+
+void *hashmap_remove2(Hashmap *h, const void *key, void **rkey);
+static inline void *ordered_hashmap_remove2(OrderedHashmap *h, const void *key, void **rkey) {
+ return hashmap_remove2(PLAIN_HASHMAP(h), key, rkey);
+}
+
+void *internal_hashmap_remove_value(HashmapBase *h, const void *key, void *value);
+static inline void *hashmap_remove_value(Hashmap *h, const void *key, void *value) {
+ return internal_hashmap_remove_value(HASHMAP_BASE(h), key, value);
+}
+
+static inline void *ordered_hashmap_remove_value(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_remove_value(PLAIN_HASHMAP(h), key, value);
+}
+
+int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value);
+static inline int ordered_hashmap_remove_and_put(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) {
+ return hashmap_remove_and_put(PLAIN_HASHMAP(h), old_key, new_key, value);
+}
+
+int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value);
+static inline int ordered_hashmap_remove_and_replace(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) {
+ return hashmap_remove_and_replace(PLAIN_HASHMAP(h), old_key, new_key, value);
+}
+
+/* Since merging data from a OrderedHashmap into a Hashmap or vice-versa
+ * should just work, allow this by having looser type-checking here. */
+int internal_hashmap_merge(Hashmap *h, Hashmap *other);
+#define hashmap_merge(h, other) internal_hashmap_merge(PLAIN_HASHMAP(h), PLAIN_HASHMAP(other))
+#define ordered_hashmap_merge(h, other) hashmap_merge(h, other)
+
+int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add);
+static inline int hashmap_reserve(Hashmap *h, unsigned entries_add) {
+ return internal_hashmap_reserve(HASHMAP_BASE(h), entries_add);
+}
+static inline int ordered_hashmap_reserve(OrderedHashmap *h, unsigned entries_add) {
+ return internal_hashmap_reserve(HASHMAP_BASE(h), entries_add);
+}
+
+int internal_hashmap_move(HashmapBase *h, HashmapBase *other);
+/* Unlike hashmap_merge, hashmap_move does not allow mixing the types. */
+static inline int hashmap_move(Hashmap *h, Hashmap *other) {
+ return internal_hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other));
+}
+static inline int ordered_hashmap_move(OrderedHashmap *h, OrderedHashmap *other) {
+ return internal_hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other));
+}
+
+int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key);
+static inline int hashmap_move_one(Hashmap *h, Hashmap *other, const void *key) {
+ return internal_hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key);
+}
+static inline int ordered_hashmap_move_one(OrderedHashmap *h, OrderedHashmap *other, const void *key) {
+ return internal_hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key);
+}
+
+unsigned internal_hashmap_size(HashmapBase *h) _pure_;
+static inline unsigned hashmap_size(Hashmap *h) {
+ return internal_hashmap_size(HASHMAP_BASE(h));
+}
+static inline unsigned ordered_hashmap_size(OrderedHashmap *h) {
+ return internal_hashmap_size(HASHMAP_BASE(h));
+}
+
+static inline bool hashmap_isempty(Hashmap *h) {
+ return hashmap_size(h) == 0;
+}
+static inline bool ordered_hashmap_isempty(OrderedHashmap *h) {
+ return ordered_hashmap_size(h) == 0;
+}
+
+unsigned internal_hashmap_buckets(HashmapBase *h) _pure_;
+static inline unsigned hashmap_buckets(Hashmap *h) {
+ return internal_hashmap_buckets(HASHMAP_BASE(h));
+}
+static inline unsigned ordered_hashmap_buckets(OrderedHashmap *h) {
+ return internal_hashmap_buckets(HASHMAP_BASE(h));
+}
+
+bool internal_hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key);
+static inline bool hashmap_iterate(Hashmap *h, Iterator *i, void **value, const void **key) {
+ return internal_hashmap_iterate(HASHMAP_BASE(h), i, value, key);
+}
+static inline bool ordered_hashmap_iterate(OrderedHashmap *h, Iterator *i, void **value, const void **key) {
+ return internal_hashmap_iterate(HASHMAP_BASE(h), i, value, key);
+}
+
+void internal_hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value);
+static inline void hashmap_clear(Hashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), NULL, NULL);
+}
+static inline void ordered_hashmap_clear(OrderedHashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), NULL, NULL);
+}
+
+static inline void hashmap_clear_free(Hashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), NULL, free);
+}
+static inline void ordered_hashmap_clear_free(OrderedHashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), NULL, free);
+}
+
+static inline void hashmap_clear_free_key(Hashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), free, NULL);
+}
+static inline void ordered_hashmap_clear_free_key(OrderedHashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), free, NULL);
+}
+
+static inline void hashmap_clear_free_free(Hashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), free, free);
+}
+static inline void ordered_hashmap_clear_free_free(OrderedHashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), free, free);
+}
+
+/*
+ * Note about all *_first*() functions
+ *
+ * For plain Hashmaps and Sets the order of entries is undefined.
+ * The functions find whatever entry is first in the implementation
+ * internal order.
+ *
+ * Only for OrderedHashmaps the order is well defined and finding
+ * the first entry is O(1).
+ */
+
+void *internal_hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key);
+static inline void *hashmap_steal_first_key_and_value(Hashmap *h, void **ret) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, ret);
+}
+static inline void *ordered_hashmap_steal_first_key_and_value(OrderedHashmap *h, void **ret) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, ret);
+}
+static inline void *hashmap_first_key_and_value(Hashmap *h, void **ret) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, ret);
+}
+static inline void *ordered_hashmap_first_key_and_value(OrderedHashmap *h, void **ret) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, ret);
+}
+
+static inline void *hashmap_steal_first(Hashmap *h) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, NULL);
+}
+static inline void *ordered_hashmap_steal_first(OrderedHashmap *h) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, NULL);
+}
+static inline void *hashmap_first(Hashmap *h) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, NULL);
+}
+static inline void *ordered_hashmap_first(OrderedHashmap *h) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, NULL);
+}
+
+static inline void *internal_hashmap_first_key(HashmapBase *h, bool remove) {
+ void *key = NULL;
+
+ (void) internal_hashmap_first_key_and_value(HASHMAP_BASE(h), remove, &key);
+ return key;
+}
+static inline void *hashmap_steal_first_key(Hashmap *h) {
+ return internal_hashmap_first_key(HASHMAP_BASE(h), true);
+}
+static inline void *ordered_hashmap_steal_first_key(OrderedHashmap *h) {
+ return internal_hashmap_first_key(HASHMAP_BASE(h), true);
+}
+static inline void *hashmap_first_key(Hashmap *h) {
+ return internal_hashmap_first_key(HASHMAP_BASE(h), false);
+}
+static inline void *ordered_hashmap_first_key(OrderedHashmap *h) {
+ return internal_hashmap_first_key(HASHMAP_BASE(h), false);
+}
+
+#define hashmap_clear_with_destructor(_s, _f) \
+ ({ \
+ void *_item; \
+ while ((_item = hashmap_steal_first(_s))) \
+ _f(_item); \
+ })
+#define hashmap_free_with_destructor(_s, _f) \
+ ({ \
+ hashmap_clear_with_destructor(_s, _f); \
+ hashmap_free(_s); \
+ })
+#define ordered_hashmap_clear_with_destructor(_s, _f) \
+ ({ \
+ void *_item; \
+ while ((_item = ordered_hashmap_steal_first(_s))) \
+ _f(_item); \
+ })
+#define ordered_hashmap_free_with_destructor(_s, _f) \
+ ({ \
+ ordered_hashmap_clear_with_destructor(_s, _f); \
+ ordered_hashmap_free(_s); \
+ })
+
+/* no hashmap_next */
+void *ordered_hashmap_next(OrderedHashmap *h, const void *key);
+
+char **internal_hashmap_get_strv(HashmapBase *h);
+static inline char **hashmap_get_strv(Hashmap *h) {
+ return internal_hashmap_get_strv(HASHMAP_BASE(h));
+}
+static inline char **ordered_hashmap_get_strv(OrderedHashmap *h) {
+ return internal_hashmap_get_strv(HASHMAP_BASE(h));
+}
+
+/*
+ * Hashmaps are iterated in unpredictable order.
+ * OrderedHashmaps are an exception to this. They are iterated in the order
+ * the entries were inserted.
+ * It is safe to remove the current entry.
+ */
+#define HASHMAP_FOREACH(e, h, i) \
+ for ((i) = ITERATOR_FIRST; hashmap_iterate((h), &(i), (void**)&(e), NULL); )
+
+#define ORDERED_HASHMAP_FOREACH(e, h, i) \
+ for ((i) = ITERATOR_FIRST; ordered_hashmap_iterate((h), &(i), (void**)&(e), NULL); )
+
+#define HASHMAP_FOREACH_KEY(e, k, h, i) \
+ for ((i) = ITERATOR_FIRST; hashmap_iterate((h), &(i), (void**)&(e), (const void**) &(k)); )
+
+#define ORDERED_HASHMAP_FOREACH_KEY(e, k, h, i) \
+ for ((i) = ITERATOR_FIRST; ordered_hashmap_iterate((h), &(i), (void**)&(e), (const void**) &(k)); )
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free_free);
+
+#define _cleanup_hashmap_free_ _cleanup_(hashmap_freep)
+#define _cleanup_hashmap_free_free_ _cleanup_(hashmap_free_freep)
+#define _cleanup_hashmap_free_free_free_ _cleanup_(hashmap_free_free_freep)
+#define _cleanup_ordered_hashmap_free_ _cleanup_(ordered_hashmap_freep)
+#define _cleanup_ordered_hashmap_free_free_ _cleanup_(ordered_hashmap_free_freep)
+#define _cleanup_ordered_hashmap_free_free_free_ _cleanup_(ordered_hashmap_free_free_freep)
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(IteratedCache*, iterated_cache_free);
+
+#define _cleanup_iterated_cache_free_ _cleanup_(iterated_cache_freep)
diff --git a/src/basic/hexdecoct.c b/src/basic/hexdecoct.c
new file mode 100644
index 0000000..c0f9640
--- /dev/null
+++ b/src/basic/hexdecoct.c
@@ -0,0 +1,819 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "string-util.h"
+#include "util.h"
+
+char octchar(int x) {
+ return '0' + (x & 7);
+}
+
+int unoctchar(char c) {
+
+ if (c >= '0' && c <= '7')
+ return c - '0';
+
+ return -EINVAL;
+}
+
+char decchar(int x) {
+ return '0' + (x % 10);
+}
+
+int undecchar(char c) {
+
+ if (c >= '0' && c <= '9')
+ return c - '0';
+
+ return -EINVAL;
+}
+
+char hexchar(int x) {
+ static const char table[16] = "0123456789abcdef";
+
+ return table[x & 15];
+}
+
+int unhexchar(char c) {
+
+ if (c >= '0' && c <= '9')
+ return c - '0';
+
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+
+ return -EINVAL;
+}
+
+char *hexmem(const void *p, size_t l) {
+ const uint8_t *x;
+ char *r, *z;
+
+ z = r = new(char, l * 2 + 1);
+ if (!r)
+ return NULL;
+
+ for (x = p; x < (const uint8_t*) p + l; x++) {
+ *(z++) = hexchar(*x >> 4);
+ *(z++) = hexchar(*x & 15);
+ }
+
+ *z = 0;
+ return r;
+}
+
+static int unhex_next(const char **p, size_t *l) {
+ int r;
+
+ assert(p);
+ assert(l);
+
+ /* Find the next non-whitespace character, and decode it. We
+ * greedily skip all preceding and all following whitespace. */
+
+ for (;;) {
+ if (*l == 0)
+ return -EPIPE;
+
+ if (!strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip leading whitespace */
+ (*p)++, (*l)--;
+ }
+
+ r = unhexchar(**p);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ (*p)++, (*l)--;
+
+ if (*l == 0 || !strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip following whitespace */
+ }
+
+ return r;
+}
+
+int unhexmem(const char *p, size_t l, void **ret, size_t *ret_len) {
+ _cleanup_free_ uint8_t *buf = NULL;
+ const char *x;
+ uint8_t *z;
+
+ assert(ret);
+ assert(ret_len);
+ assert(p || l == 0);
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* Note that the calculation of memory size is an upper boundary, as we ignore whitespace while decoding */
+ buf = malloc((l + 1) / 2 + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ for (x = p, z = buf;;) {
+ int a, b;
+
+ a = unhex_next(&x, &l);
+ if (a == -EPIPE) /* End of string */
+ break;
+ if (a < 0)
+ return a;
+
+ b = unhex_next(&x, &l);
+ if (b < 0)
+ return b;
+
+ *(z++) = (uint8_t) a << 4 | (uint8_t) b;
+ }
+
+ *z = 0;
+
+ *ret_len = (size_t) (z - buf);
+ *ret = TAKE_PTR(buf);
+
+ return 0;
+}
+
+/* https://tools.ietf.org/html/rfc4648#section-6
+ * Notice that base32hex differs from base32 in the alphabet it uses.
+ * The distinction is that the base32hex representation preserves the
+ * order of the underlying data when compared as bytestrings, this is
+ * useful when representing NSEC3 hashes, as one can then verify the
+ * order of hashes directly from their representation. */
+char base32hexchar(int x) {
+ static const char table[32] = "0123456789"
+ "ABCDEFGHIJKLMNOPQRSTUV";
+
+ return table[x & 31];
+}
+
+int unbase32hexchar(char c) {
+ unsigned offset;
+
+ if (c >= '0' && c <= '9')
+ return c - '0';
+
+ offset = '9' - '0' + 1;
+
+ if (c >= 'A' && c <= 'V')
+ return c - 'A' + offset;
+
+ return -EINVAL;
+}
+
+char *base32hexmem(const void *p, size_t l, bool padding) {
+ char *r, *z;
+ const uint8_t *x;
+ size_t len;
+
+ assert(p || l == 0);
+
+ if (padding)
+ /* five input bytes makes eight output bytes, padding is added so we must round up */
+ len = 8 * (l + 4) / 5;
+ else {
+ /* same, but round down as there is no padding */
+ len = 8 * l / 5;
+
+ switch (l % 5) {
+ case 4:
+ len += 7;
+ break;
+ case 3:
+ len += 5;
+ break;
+ case 2:
+ len += 4;
+ break;
+ case 1:
+ len += 2;
+ break;
+ }
+ }
+
+ z = r = malloc(len + 1);
+ if (!r)
+ return NULL;
+
+ for (x = p; x < (const uint8_t*) p + (l / 5) * 5; x += 5) {
+ /* x[0] == XXXXXXXX; x[1] == YYYYYYYY; x[2] == ZZZZZZZZ
+ * x[3] == QQQQQQQQ; x[4] == WWWWWWWW */
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */
+ *(z++) = base32hexchar((x[2] & 15) << 1 | x[3] >> 7); /* 000ZZZZQ */
+ *(z++) = base32hexchar((x[3] & 127) >> 2); /* 000QQQQQ */
+ *(z++) = base32hexchar((x[3] & 3) << 3 | x[4] >> 5); /* 000QQWWW */
+ *(z++) = base32hexchar((x[4] & 31)); /* 000WWWWW */
+ }
+
+ switch (l % 5) {
+ case 4:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */
+ *(z++) = base32hexchar((x[2] & 15) << 1 | x[3] >> 7); /* 000ZZZZQ */
+ *(z++) = base32hexchar((x[3] & 127) >> 2); /* 000QQQQQ */
+ *(z++) = base32hexchar((x[3] & 3) << 3); /* 000QQ000 */
+ if (padding)
+ *(z++) = '=';
+
+ break;
+
+ case 3:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */
+ *(z++) = base32hexchar((x[2] & 15) << 1); /* 000ZZZZ0 */
+ if (padding) {
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ }
+
+ break;
+
+ case 2:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4); /* 000Y0000 */
+ if (padding) {
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ }
+
+ break;
+
+ case 1:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2); /* 000XXX00 */
+ if (padding) {
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ }
+
+ break;
+ }
+
+ *z = 0;
+ return r;
+}
+
+int unbase32hexmem(const char *p, size_t l, bool padding, void **mem, size_t *_len) {
+ _cleanup_free_ uint8_t *r = NULL;
+ int a, b, c, d, e, f, g, h;
+ uint8_t *z;
+ const char *x;
+ size_t len;
+ unsigned pad = 0;
+
+ assert(p || l == 0);
+ assert(mem);
+ assert(_len);
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* padding ensures any base32hex input has input divisible by 8 */
+ if (padding && l % 8 != 0)
+ return -EINVAL;
+
+ if (padding) {
+ /* strip the padding */
+ while (l > 0 && p[l - 1] == '=' && pad < 7) {
+ pad++;
+ l--;
+ }
+ }
+
+ /* a group of eight input bytes needs five output bytes, in case of
+ * padding we need to add some extra bytes */
+ len = (l / 8) * 5;
+
+ switch (l % 8) {
+ case 7:
+ len += 4;
+ break;
+ case 5:
+ len += 3;
+ break;
+ case 4:
+ len += 2;
+ break;
+ case 2:
+ len += 1;
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ z = r = malloc(len + 1);
+ if (!r)
+ return -ENOMEM;
+
+ for (x = p; x < p + (l / 8) * 8; x += 8) {
+ /* a == 000XXXXX; b == 000YYYYY; c == 000ZZZZZ; d == 000WWWWW
+ * e == 000SSSSS; f == 000QQQQQ; g == 000VVVVV; h == 000RRRRR */
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ e = unbase32hexchar(x[4]);
+ if (e < 0)
+ return -EINVAL;
+
+ f = unbase32hexchar(x[5]);
+ if (f < 0)
+ return -EINVAL;
+
+ g = unbase32hexchar(x[6]);
+ if (g < 0)
+ return -EINVAL;
+
+ h = unbase32hexchar(x[7]);
+ if (h < 0)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+ *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */
+ *(z++) = (uint8_t) e << 7 | (uint8_t) f << 2 | (uint8_t) g >> 3; /* SQQQQQVV */
+ *(z++) = (uint8_t) g << 5 | (uint8_t) h; /* VVVRRRRR */
+ }
+
+ switch (l % 8) {
+ case 7:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ e = unbase32hexchar(x[4]);
+ if (e < 0)
+ return -EINVAL;
+
+ f = unbase32hexchar(x[5]);
+ if (f < 0)
+ return -EINVAL;
+
+ g = unbase32hexchar(x[6]);
+ if (g < 0)
+ return -EINVAL;
+
+ /* g == 000VV000 */
+ if (g & 7)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+ *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */
+ *(z++) = (uint8_t) e << 7 | (uint8_t) f << 2 | (uint8_t) g >> 3; /* SQQQQQVV */
+
+ break;
+ case 5:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ e = unbase32hexchar(x[4]);
+ if (e < 0)
+ return -EINVAL;
+
+ /* e == 000SSSS0 */
+ if (e & 1)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+ *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */
+
+ break;
+ case 4:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ /* d == 000W0000 */
+ if (d & 15)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+
+ break;
+ case 2:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ /* b == 000YYY00 */
+ if (b & 3)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ *z = 0;
+
+ *mem = TAKE_PTR(r);
+ *_len = len;
+
+ return 0;
+}
+
+/* https://tools.ietf.org/html/rfc4648#section-4 */
+char base64char(int x) {
+ static const char table[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
+ return table[x & 63];
+}
+
+int unbase64char(char c) {
+ unsigned offset;
+
+ if (c >= 'A' && c <= 'Z')
+ return c - 'A';
+
+ offset = 'Z' - 'A' + 1;
+
+ if (c >= 'a' && c <= 'z')
+ return c - 'a' + offset;
+
+ offset += 'z' - 'a' + 1;
+
+ if (c >= '0' && c <= '9')
+ return c - '0' + offset;
+
+ offset += '9' - '0' + 1;
+
+ if (c == '+')
+ return offset;
+
+ offset++;
+
+ if (c == '/')
+ return offset;
+
+ return -EINVAL;
+}
+
+ssize_t base64mem(const void *p, size_t l, char **out) {
+ char *r, *z;
+ const uint8_t *x;
+
+ assert(p || l == 0);
+ assert(out);
+
+ /* three input bytes makes four output bytes, padding is added so we must round up */
+ z = r = malloc(4 * (l + 2) / 3 + 1);
+ if (!r)
+ return -ENOMEM;
+
+ for (x = p; x < (const uint8_t*) p + (l / 3) * 3; x += 3) {
+ /* x[0] == XXXXXXXX; x[1] == YYYYYYYY; x[2] == ZZZZZZZZ */
+ *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */
+ *(z++) = base64char((x[0] & 3) << 4 | x[1] >> 4); /* 00XXYYYY */
+ *(z++) = base64char((x[1] & 15) << 2 | x[2] >> 6); /* 00YYYYZZ */
+ *(z++) = base64char(x[2] & 63); /* 00ZZZZZZ */
+ }
+
+ switch (l % 3) {
+ case 2:
+ *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */
+ *(z++) = base64char((x[0] & 3) << 4 | x[1] >> 4); /* 00XXYYYY */
+ *(z++) = base64char((x[1] & 15) << 2); /* 00YYYY00 */
+ *(z++) = '=';
+
+ break;
+ case 1:
+ *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */
+ *(z++) = base64char((x[0] & 3) << 4); /* 00XX0000 */
+ *(z++) = '=';
+ *(z++) = '=';
+
+ break;
+ }
+
+ *z = 0;
+ *out = r;
+ return z - r;
+}
+
+static int base64_append_width(
+ char **prefix, int plen,
+ const char *sep, int indent,
+ const void *p, size_t l,
+ int width) {
+
+ _cleanup_free_ char *x = NULL;
+ char *t, *s;
+ ssize_t len, slen, avail, line, lines;
+
+ len = base64mem(p, l, &x);
+ if (len <= 0)
+ return len;
+
+ lines = DIV_ROUND_UP(len, width);
+
+ slen = strlen_ptr(sep);
+ if (lines > (SSIZE_MAX - plen - 1 - slen) / (indent + width + 1))
+ return -ENOMEM;
+
+ t = realloc(*prefix, plen + 1 + slen + (indent + width + 1) * lines);
+ if (!t)
+ return -ENOMEM;
+
+ memcpy_safe(t + plen, sep, slen);
+
+ for (line = 0, s = t + plen + slen, avail = len; line < lines; line++) {
+ int act = MIN(width, avail);
+
+ if (line > 0 || sep) {
+ memset(s, ' ', indent);
+ s += indent;
+ }
+
+ memcpy(s, x + width * line, act);
+ s += act;
+ *(s++) = line < lines - 1 ? '\n' : '\0';
+ avail -= act;
+ }
+ assert(avail == 0);
+
+ *prefix = t;
+ return 0;
+}
+
+int base64_append(
+ char **prefix, int plen,
+ const void *p, size_t l,
+ int indent, int width) {
+
+ if (plen > width / 2 || plen + indent > width)
+ /* leave indent on the left, keep last column free */
+ return base64_append_width(prefix, plen, "\n", indent, p, l, width - indent - 1);
+ else
+ /* leave plen on the left, keep last column free */
+ return base64_append_width(prefix, plen, NULL, plen, p, l, width - plen - 1);
+}
+
+static int unbase64_next(const char **p, size_t *l) {
+ int ret;
+
+ assert(p);
+ assert(l);
+
+ /* Find the next non-whitespace character, and decode it. If we find padding, we return it as INT_MAX. We
+ * greedily skip all preceding and all following whitespace. */
+
+ for (;;) {
+ if (*l == 0)
+ return -EPIPE;
+
+ if (!strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip leading whitespace */
+ (*p)++, (*l)--;
+ }
+
+ if (**p == '=')
+ ret = INT_MAX; /* return padding as INT_MAX */
+ else {
+ ret = unbase64char(**p);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (;;) {
+ (*p)++, (*l)--;
+
+ if (*l == 0)
+ break;
+ if (!strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip following whitespace */
+ }
+
+ return ret;
+}
+
+int unbase64mem(const char *p, size_t l, void **ret, size_t *ret_size) {
+ _cleanup_free_ uint8_t *buf = NULL;
+ const char *x;
+ uint8_t *z;
+ size_t len;
+
+ assert(p || l == 0);
+ assert(ret);
+ assert(ret_size);
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* A group of four input bytes needs three output bytes, in case of padding we need to add two or three extra
+ * bytes. Note that this calculation is an upper boundary, as we ignore whitespace while decoding */
+ len = (l / 4) * 3 + (l % 4 != 0 ? (l % 4) - 1 : 0);
+
+ buf = malloc(len + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ for (x = p, z = buf;;) {
+ int a, b, c, d; /* a == 00XXXXXX; b == 00YYYYYY; c == 00ZZZZZZ; d == 00WWWWWW */
+
+ a = unbase64_next(&x, &l);
+ if (a == -EPIPE) /* End of string */
+ break;
+ if (a < 0)
+ return a;
+ if (a == INT_MAX) /* Padding is not allowed at the beginning of a 4ch block */
+ return -EINVAL;
+
+ b = unbase64_next(&x, &l);
+ if (b < 0)
+ return b;
+ if (b == INT_MAX) /* Padding is not allowed at the second character of a 4ch block either */
+ return -EINVAL;
+
+ c = unbase64_next(&x, &l);
+ if (c < 0)
+ return c;
+
+ d = unbase64_next(&x, &l);
+ if (d < 0)
+ return d;
+
+ if (c == INT_MAX) { /* Padding at the third character */
+
+ if (d != INT_MAX) /* If the third character is padding, the fourth must be too */
+ return -EINVAL;
+
+ /* b == 00YY0000 */
+ if (b & 15)
+ return -EINVAL;
+
+ if (l > 0) /* Trailing rubbish? */
+ return -ENAMETOOLONG;
+
+ *(z++) = (uint8_t) a << 2 | (uint8_t) (b >> 4); /* XXXXXXYY */
+ break;
+ }
+
+ if (d == INT_MAX) {
+ /* c == 00ZZZZ00 */
+ if (c & 3)
+ return -EINVAL;
+
+ if (l > 0) /* Trailing rubbish? */
+ return -ENAMETOOLONG;
+
+ *(z++) = (uint8_t) a << 2 | (uint8_t) b >> 4; /* XXXXXXYY */
+ *(z++) = (uint8_t) b << 4 | (uint8_t) c >> 2; /* YYYYZZZZ */
+ break;
+ }
+
+ *(z++) = (uint8_t) a << 2 | (uint8_t) b >> 4; /* XXXXXXYY */
+ *(z++) = (uint8_t) b << 4 | (uint8_t) c >> 2; /* YYYYZZZZ */
+ *(z++) = (uint8_t) c << 6 | (uint8_t) d; /* ZZWWWWWW */
+ }
+
+ *z = 0;
+
+ *ret_size = (size_t) (z - buf);
+ *ret = TAKE_PTR(buf);
+
+ return 0;
+}
+
+void hexdump(FILE *f, const void *p, size_t s) {
+ const uint8_t *b = p;
+ unsigned n = 0;
+
+ assert(b || s == 0);
+
+ if (!f)
+ f = stdout;
+
+ while (s > 0) {
+ size_t i;
+
+ fprintf(f, "%04x ", n);
+
+ for (i = 0; i < 16; i++) {
+
+ if (i >= s)
+ fputs(" ", f);
+ else
+ fprintf(f, "%02x ", b[i]);
+
+ if (i == 7)
+ fputc(' ', f);
+ }
+
+ fputc(' ', f);
+
+ for (i = 0; i < 16; i++) {
+
+ if (i >= s)
+ fputc(' ', f);
+ else
+ fputc(isprint(b[i]) ? (char) b[i] : '.', f);
+ }
+
+ fputc('\n', f);
+
+ if (s < 16)
+ break;
+
+ n += 16;
+ b += 16;
+ s -= 16;
+ }
+}
diff --git a/src/basic/hexdecoct.h b/src/basic/hexdecoct.h
new file mode 100644
index 0000000..9477d16
--- /dev/null
+++ b/src/basic/hexdecoct.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+char octchar(int x) _const_;
+int unoctchar(char c) _const_;
+
+char decchar(int x) _const_;
+int undecchar(char c) _const_;
+
+char hexchar(int x) _const_;
+int unhexchar(char c) _const_;
+
+char *hexmem(const void *p, size_t l);
+int unhexmem(const char *p, size_t l, void **mem, size_t *len);
+
+char base32hexchar(int x) _const_;
+int unbase32hexchar(char c) _const_;
+
+char base64char(int x) _const_;
+int unbase64char(char c) _const_;
+
+char *base32hexmem(const void *p, size_t l, bool padding);
+int unbase32hexmem(const char *p, size_t l, bool padding, void **mem, size_t *len);
+
+ssize_t base64mem(const void *p, size_t l, char **out);
+int base64_append(char **prefix, int plen,
+ const void *p, size_t l,
+ int margin, int width);
+int unbase64mem(const char *p, size_t l, void **mem, size_t *len);
+
+void hexdump(FILE *f, const void *p, size_t s);
diff --git a/src/basic/hostname-util.c b/src/basic/hostname-util.c
new file mode 100644
index 0000000..5bfa028
--- /dev/null
+++ b/src/basic/hostname-util.c
@@ -0,0 +1,308 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "string-util.h"
+
+bool hostname_is_set(void) {
+ struct utsname u;
+
+ assert_se(uname(&u) >= 0);
+
+ if (isempty(u.nodename))
+ return false;
+
+ /* This is the built-in kernel default host name */
+ if (streq(u.nodename, "(none)"))
+ return false;
+
+ return true;
+}
+
+char* gethostname_malloc(void) {
+ struct utsname u;
+
+ /* This call tries to return something useful, either the actual hostname
+ * or it makes something up. The only reason it might fail is OOM.
+ * It might even return "localhost" if that's set. */
+
+ assert_se(uname(&u) >= 0);
+
+ if (isempty(u.nodename) || streq(u.nodename, "(none)"))
+ return strdup(FALLBACK_HOSTNAME);
+
+ return strdup(u.nodename);
+}
+
+int gethostname_strict(char **ret) {
+ struct utsname u;
+ char *k;
+
+ /* This call will rather fail than make up a name. It will not return "localhost" either. */
+
+ assert_se(uname(&u) >= 0);
+
+ if (isempty(u.nodename))
+ return -ENXIO;
+
+ if (streq(u.nodename, "(none)"))
+ return -ENXIO;
+
+ if (is_localhost(u.nodename))
+ return -ENXIO;
+
+ k = strdup(u.nodename);
+ if (!k)
+ return -ENOMEM;
+
+ *ret = k;
+ return 0;
+}
+
+bool valid_ldh_char(char c) {
+ return
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ c == '-';
+}
+
+/**
+ * Check if s looks like a valid host name or FQDN. This does not do
+ * full DNS validation, but only checks if the name is composed of
+ * allowed characters and the length is not above the maximum allowed
+ * by Linux (c.f. dns_name_is_valid()). Trailing dot is allowed if
+ * allow_trailing_dot is true and at least two components are present
+ * in the name. Note that due to the restricted charset and length
+ * this call is substantially more conservative than
+ * dns_name_is_valid().
+ */
+bool hostname_is_valid(const char *s, bool allow_trailing_dot) {
+ unsigned n_dots = 0;
+ const char *p;
+ bool dot, hyphen;
+
+ if (isempty(s))
+ return false;
+
+ /* Doesn't accept empty hostnames, hostnames with
+ * leading dots, and hostnames with multiple dots in a
+ * sequence. Also ensures that the length stays below
+ * HOST_NAME_MAX. */
+
+ for (p = s, dot = hyphen = true; *p; p++)
+ if (*p == '.') {
+ if (dot || hyphen)
+ return false;
+
+ dot = true;
+ hyphen = false;
+ n_dots++;
+
+ } else if (*p == '-') {
+ if (dot)
+ return false;
+
+ dot = false;
+ hyphen = true;
+
+ } else {
+ if (!valid_ldh_char(*p))
+ return false;
+
+ dot = false;
+ hyphen = false;
+ }
+
+ if (dot && (n_dots < 2 || !allow_trailing_dot))
+ return false;
+ if (hyphen)
+ return false;
+
+ if (p-s > HOST_NAME_MAX) /* Note that HOST_NAME_MAX is 64 on
+ * Linux, but DNS allows domain names
+ * up to 255 characters */
+ return false;
+
+ return true;
+}
+
+char* hostname_cleanup(char *s) {
+ char *p, *d;
+ bool dot, hyphen;
+
+ assert(s);
+
+ for (p = s, d = s, dot = hyphen = true; *p && d - s < HOST_NAME_MAX; p++)
+ if (*p == '.') {
+ if (dot || hyphen)
+ continue;
+
+ *(d++) = '.';
+ dot = true;
+ hyphen = false;
+
+ } else if (*p == '-') {
+ if (dot)
+ continue;
+
+ *(d++) = '-';
+ dot = false;
+ hyphen = true;
+
+ } else if (valid_ldh_char(*p)) {
+ *(d++) = *p;
+ dot = false;
+ hyphen = false;
+ }
+
+ if (d > s && IN_SET(d[-1], '-', '.'))
+ /* The dot can occur at most once, but we might have multiple
+ * hyphens, hence the loop */
+ d--;
+ *d = 0;
+
+ return s;
+}
+
+bool is_localhost(const char *hostname) {
+ assert(hostname);
+
+ /* This tries to identify local host and domain names
+ * described in RFC6761 plus the redhatism of localdomain */
+
+ return strcaseeq(hostname, "localhost") ||
+ strcaseeq(hostname, "localhost.") ||
+ strcaseeq(hostname, "localhost.localdomain") ||
+ strcaseeq(hostname, "localhost.localdomain.") ||
+ endswith_no_case(hostname, ".localhost") ||
+ endswith_no_case(hostname, ".localhost.") ||
+ endswith_no_case(hostname, ".localhost.localdomain") ||
+ endswith_no_case(hostname, ".localhost.localdomain.");
+}
+
+bool is_gateway_hostname(const char *hostname) {
+ assert(hostname);
+
+ /* This tries to identify the valid syntaxes for the our
+ * synthetic "gateway" host. */
+
+ return
+ strcaseeq(hostname, "_gateway") || strcaseeq(hostname, "_gateway.")
+#if ENABLE_COMPAT_GATEWAY_HOSTNAME
+ || strcaseeq(hostname, "gateway") || strcaseeq(hostname, "gateway.")
+#endif
+ ;
+}
+
+int sethostname_idempotent(const char *s) {
+ char buf[HOST_NAME_MAX + 1] = {};
+
+ assert(s);
+
+ if (gethostname(buf, sizeof(buf)) < 0)
+ return -errno;
+
+ if (streq(buf, s))
+ return 0;
+
+ if (sethostname(s, strlen(s)) < 0)
+ return -errno;
+
+ return 1;
+}
+
+int shorten_overlong(const char *s, char **ret) {
+ char *h, *p;
+
+ /* Shorten an overlong name to HOST_NAME_MAX or to the first dot,
+ * whatever comes earlier. */
+
+ assert(s);
+
+ h = strdup(s);
+ if (!h)
+ return -ENOMEM;
+
+ if (hostname_is_valid(h, false)) {
+ *ret = h;
+ return 0;
+ }
+
+ p = strchr(h, '.');
+ if (p)
+ *p = 0;
+
+ strshorten(h, HOST_NAME_MAX);
+
+ if (!hostname_is_valid(h, false)) {
+ free(h);
+ return -EDOM;
+ }
+
+ *ret = h;
+ return 1;
+}
+
+int read_etc_hostname_stream(FILE *f, char **ret) {
+ int r;
+
+ assert(f);
+ assert(ret);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0) /* EOF without any hostname? the file is empty, let's treat that exactly like no file at all: ENOENT */
+ return -ENOENT;
+
+ p = strstrip(line);
+
+ /* File may have empty lines or comments, ignore them */
+ if (!IN_SET(*p, '\0', '#')) {
+ char *copy;
+
+ hostname_cleanup(p); /* normalize the hostname */
+
+ if (!hostname_is_valid(p, true)) /* check that the hostname we return is valid */
+ return -EBADMSG;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+ }
+ }
+}
+
+int read_etc_hostname(const char *path, char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ assert(ret);
+
+ if (!path)
+ path = "/etc/hostname";
+
+ f = fopen(path, "re");
+ if (!f)
+ return -errno;
+
+ return read_etc_hostname_stream(f, ret);
+
+}
diff --git a/src/basic/hostname-util.h b/src/basic/hostname-util.h
new file mode 100644
index 0000000..7ba386a
--- /dev/null
+++ b/src/basic/hostname-util.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "macro.h"
+
+bool hostname_is_set(void);
+
+char* gethostname_malloc(void);
+int gethostname_strict(char **ret);
+
+bool valid_ldh_char(char c) _const_;
+bool hostname_is_valid(const char *s, bool allow_trailing_dot) _pure_;
+char* hostname_cleanup(char *s);
+
+#define machine_name_is_valid(s) hostname_is_valid(s, false)
+
+bool is_localhost(const char *hostname);
+bool is_gateway_hostname(const char *hostname);
+
+int sethostname_idempotent(const char *s);
+
+int shorten_overlong(const char *s, char **ret);
+
+int read_etc_hostname_stream(FILE *f, char **ret);
+int read_etc_hostname(const char *path, char **ret);
diff --git a/src/basic/in-addr-util.c b/src/basic/in-addr-util.c
new file mode 100644
index 0000000..2bffe47
--- /dev/null
+++ b/src/basic/in-addr-util.c
@@ -0,0 +1,622 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <endian.h>
+#include <errno.h>
+#include <net/if.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "util.h"
+
+bool in4_addr_is_null(const struct in_addr *a) {
+ assert(a);
+
+ return a->s_addr == 0;
+}
+
+int in_addr_is_null(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return in4_addr_is_null(&u->in);
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_UNSPECIFIED(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+bool in4_addr_is_link_local(const struct in_addr *a) {
+ assert(a);
+
+ return (be32toh(a->s_addr) & UINT32_C(0xFFFF0000)) == (UINT32_C(169) << 24 | UINT32_C(254) << 16);
+}
+
+int in_addr_is_link_local(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return in4_addr_is_link_local(&u->in);
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_LINKLOCAL(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_is_multicast(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return IN_MULTICAST(be32toh(u->in.s_addr));
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_MULTICAST(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+bool in4_addr_is_localhost(const struct in_addr *a) {
+ assert(a);
+
+ /* All of 127.x.x.x is localhost. */
+ return (be32toh(a->s_addr) & UINT32_C(0xFF000000)) == UINT32_C(127) << 24;
+}
+
+int in_addr_is_localhost(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return in4_addr_is_localhost(&u->in);
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_LOOPBACK(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_equal(int family, const union in_addr_union *a, const union in_addr_union *b) {
+ assert(a);
+ assert(b);
+
+ if (family == AF_INET)
+ return a->in.s_addr == b->in.s_addr;
+
+ if (family == AF_INET6)
+ return
+ a->in6.s6_addr32[0] == b->in6.s6_addr32[0] &&
+ a->in6.s6_addr32[1] == b->in6.s6_addr32[1] &&
+ a->in6.s6_addr32[2] == b->in6.s6_addr32[2] &&
+ a->in6.s6_addr32[3] == b->in6.s6_addr32[3];
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_prefix_intersect(
+ int family,
+ const union in_addr_union *a,
+ unsigned aprefixlen,
+ const union in_addr_union *b,
+ unsigned bprefixlen) {
+
+ unsigned m;
+
+ assert(a);
+ assert(b);
+
+ /* Checks whether there are any addresses that are in both
+ * networks */
+
+ m = MIN(aprefixlen, bprefixlen);
+
+ if (family == AF_INET) {
+ uint32_t x, nm;
+
+ x = be32toh(a->in.s_addr ^ b->in.s_addr);
+ nm = (m == 0) ? 0 : 0xFFFFFFFFUL << (32 - m);
+
+ return (x & nm) == 0;
+ }
+
+ if (family == AF_INET6) {
+ unsigned i;
+
+ if (m > 128)
+ m = 128;
+
+ for (i = 0; i < 16; i++) {
+ uint8_t x, nm;
+
+ x = a->in6.s6_addr[i] ^ b->in6.s6_addr[i];
+
+ if (m < 8)
+ nm = 0xFF << (8 - m);
+ else
+ nm = 0xFF;
+
+ if ((x & nm) != 0)
+ return 0;
+
+ if (m > 8)
+ m -= 8;
+ else
+ m = 0;
+ }
+
+ return 1;
+ }
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen) {
+ assert(u);
+
+ /* Increases the network part of an address by one. Returns
+ * positive it that succeeds, or 0 if this overflows. */
+
+ if (prefixlen <= 0)
+ return 0;
+
+ if (family == AF_INET) {
+ uint32_t c, n;
+
+ if (prefixlen > 32)
+ prefixlen = 32;
+
+ c = be32toh(u->in.s_addr);
+ n = c + (1UL << (32 - prefixlen));
+ if (n < c)
+ return 0;
+ n &= 0xFFFFFFFFUL << (32 - prefixlen);
+
+ u->in.s_addr = htobe32(n);
+ return 1;
+ }
+
+ if (family == AF_INET6) {
+ struct in6_addr add = {}, result;
+ uint8_t overflow = 0;
+ unsigned i;
+
+ if (prefixlen > 128)
+ prefixlen = 128;
+
+ /* First calculate what we have to add */
+ add.s6_addr[(prefixlen-1) / 8] = 1 << (7 - (prefixlen-1) % 8);
+
+ for (i = 16; i > 0; i--) {
+ unsigned j = i - 1;
+
+ result.s6_addr[j] = u->in6.s6_addr[j] + add.s6_addr[j] + overflow;
+ overflow = (result.s6_addr[j] < u->in6.s6_addr[j]);
+ }
+
+ if (overflow)
+ return 0;
+
+ u->in6 = result;
+ return 1;
+ }
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_to_string(int family, const union in_addr_union *u, char **ret) {
+ char *x;
+ size_t l;
+
+ assert(u);
+ assert(ret);
+
+ if (family == AF_INET)
+ l = INET_ADDRSTRLEN;
+ else if (family == AF_INET6)
+ l = INET6_ADDRSTRLEN;
+ else
+ return -EAFNOSUPPORT;
+
+ x = new(char, l);
+ if (!x)
+ return -ENOMEM;
+
+ errno = 0;
+ if (!inet_ntop(family, u, x, l)) {
+ free(x);
+ return errno > 0 ? -errno : -EINVAL;
+ }
+
+ *ret = x;
+ return 0;
+}
+
+int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret) {
+ size_t l;
+ char *x;
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ /* Much like in_addr_to_string(), but optionally appends the zone interface index to the address, to properly
+ * handle IPv6 link-local addresses. */
+
+ if (family != AF_INET6)
+ goto fallback;
+ if (ifindex <= 0)
+ goto fallback;
+
+ r = in_addr_is_link_local(family, u);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ goto fallback;
+
+ l = INET6_ADDRSTRLEN + 1 + DECIMAL_STR_MAX(ifindex) + 1;
+ x = new(char, l);
+ if (!x)
+ return -ENOMEM;
+
+ errno = 0;
+ if (!inet_ntop(family, u, x, l)) {
+ free(x);
+ return errno > 0 ? -errno : -EINVAL;
+ }
+
+ sprintf(strchr(x, 0), "%%%i", ifindex);
+ *ret = x;
+
+ return 0;
+
+fallback:
+ return in_addr_to_string(family, u, ret);
+}
+
+int in_addr_from_string(int family, const char *s, union in_addr_union *ret) {
+ union in_addr_union buffer;
+ assert(s);
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ errno = 0;
+ if (inet_pton(family, s, ret ?: &buffer) <= 0)
+ return errno > 0 ? -errno : -EINVAL;
+
+ return 0;
+}
+
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret) {
+ int r;
+
+ assert(s);
+
+ r = in_addr_from_string(AF_INET, s, ret);
+ if (r >= 0) {
+ if (ret_family)
+ *ret_family = AF_INET;
+ return 0;
+ }
+
+ r = in_addr_from_string(AF_INET6, s, ret);
+ if (r >= 0) {
+ if (ret_family)
+ *ret_family = AF_INET6;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex) {
+ _cleanup_free_ char *buf = NULL;
+ const char *suffix;
+ int r, ifi = 0;
+
+ assert(s);
+ assert(family);
+ assert(ret);
+
+ /* Similar to in_addr_from_string_auto() but also parses an optionally appended IPv6 zone suffix ("scope id")
+ * if one is found. */
+
+ suffix = strchr(s, '%');
+ if (suffix) {
+
+ if (ifindex) {
+ /* If we shall return the interface index, try to parse it */
+ r = parse_ifindex(suffix + 1, &ifi);
+ if (r < 0) {
+ unsigned u;
+
+ u = if_nametoindex(suffix + 1);
+ if (u <= 0)
+ return -errno;
+
+ ifi = (int) u;
+ }
+ }
+
+ buf = strndup(s, suffix - s);
+ if (!buf)
+ return -ENOMEM;
+
+ s = buf;
+ }
+
+ r = in_addr_from_string_auto(s, family, ret);
+ if (r < 0)
+ return r;
+
+ if (ifindex)
+ *ifindex = ifi;
+
+ return r;
+}
+
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr) {
+ assert(addr);
+
+ return 32U - u32ctz(be32toh(addr->s_addr));
+}
+
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
+ assert(addr);
+ assert(prefixlen <= 32);
+
+ /* Shifting beyond 32 is not defined, handle this specially. */
+ if (prefixlen == 0)
+ addr->s_addr = 0;
+ else
+ addr->s_addr = htobe32((0xffffffff << (32 - prefixlen)) & 0xffffffff);
+
+ return addr;
+}
+
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
+ uint8_t msb_octet = *(uint8_t*) addr;
+
+ /* addr may not be aligned, so make sure we only access it byte-wise */
+
+ assert(addr);
+ assert(prefixlen);
+
+ if (msb_octet < 128)
+ /* class A, leading bits: 0 */
+ *prefixlen = 8;
+ else if (msb_octet < 192)
+ /* class B, leading bits 10 */
+ *prefixlen = 16;
+ else if (msb_octet < 224)
+ /* class C, leading bits 110 */
+ *prefixlen = 24;
+ else
+ /* class D or E, no default prefixlen */
+ return -ERANGE;
+
+ return 0;
+}
+
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
+ unsigned char prefixlen;
+ int r;
+
+ assert(addr);
+ assert(mask);
+
+ r = in4_addr_default_prefixlen(addr, &prefixlen);
+ if (r < 0)
+ return r;
+
+ in4_addr_prefixlen_to_netmask(mask, prefixlen);
+ return 0;
+}
+
+int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen) {
+ assert(addr);
+
+ if (family == AF_INET) {
+ struct in_addr mask;
+
+ if (!in4_addr_prefixlen_to_netmask(&mask, prefixlen))
+ return -EINVAL;
+
+ addr->in.s_addr &= mask.s_addr;
+ return 0;
+ }
+
+ if (family == AF_INET6) {
+ unsigned i;
+
+ for (i = 0; i < 16; i++) {
+ uint8_t mask;
+
+ if (prefixlen >= 8) {
+ mask = 0xFF;
+ prefixlen -= 8;
+ } else {
+ mask = 0xFF << (8 - prefixlen);
+ prefixlen = 0;
+ }
+
+ addr->in6.s6_addr[i] &= mask;
+ }
+
+ return 0;
+ }
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_prefix_covers(int family,
+ const union in_addr_union *prefix,
+ unsigned char prefixlen,
+ const union in_addr_union *address) {
+
+ union in_addr_union masked_prefix, masked_address;
+ int r;
+
+ assert(prefix);
+ assert(address);
+
+ masked_prefix = *prefix;
+ r = in_addr_mask(family, &masked_prefix, prefixlen);
+ if (r < 0)
+ return r;
+
+ masked_address = *address;
+ r = in_addr_mask(family, &masked_address, prefixlen);
+ if (r < 0)
+ return r;
+
+ return in_addr_equal(family, &masked_prefix, &masked_address);
+}
+
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret) {
+ uint8_t u;
+ int r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ r = safe_atou8(p, &u);
+ if (r < 0)
+ return r;
+
+ if (u > FAMILY_ADDRESS_SIZE(family) * 8)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int in_addr_prefix_from_string(
+ const char *p,
+ int family,
+ union in_addr_union *ret_prefix,
+ unsigned char *ret_prefixlen) {
+
+ _cleanup_free_ char *str = NULL;
+ union in_addr_union buffer;
+ const char *e, *l;
+ unsigned char k;
+ int r;
+
+ assert(p);
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ e = strchr(p, '/');
+ if (e) {
+ str = strndup(p, e - p);
+ if (!str)
+ return -ENOMEM;
+
+ l = str;
+ } else
+ l = p;
+
+ r = in_addr_from_string(family, l, &buffer);
+ if (r < 0)
+ return r;
+
+ if (e) {
+ r = in_addr_parse_prefixlen(family, e+1, &k);
+ if (r < 0)
+ return r;
+ } else
+ k = FAMILY_ADDRESS_SIZE(family) * 8;
+
+ if (ret_prefix)
+ *ret_prefix = buffer;
+ if (ret_prefixlen)
+ *ret_prefixlen = k;
+
+ return 0;
+}
+
+int in_addr_prefix_from_string_auto_internal(
+ const char *p,
+ InAddrPrefixLenMode mode,
+ int *ret_family,
+ union in_addr_union *ret_prefix,
+ unsigned char *ret_prefixlen) {
+
+ _cleanup_free_ char *str = NULL;
+ union in_addr_union buffer;
+ const char *e, *l;
+ unsigned char k;
+ int family, r;
+
+ assert(p);
+
+ e = strchr(p, '/');
+ if (e) {
+ str = strndup(p, e - p);
+ if (!str)
+ return -ENOMEM;
+
+ l = str;
+ } else
+ l = p;
+
+ r = in_addr_from_string_auto(l, &family, &buffer);
+ if (r < 0)
+ return r;
+
+ if (e) {
+ r = in_addr_parse_prefixlen(family, e+1, &k);
+ if (r < 0)
+ return r;
+ } else
+ switch (mode) {
+ case PREFIXLEN_FULL:
+ k = FAMILY_ADDRESS_SIZE(family) * 8;
+ break;
+ case PREFIXLEN_REFUSE:
+ return -ENOANO; /* To distinguish this error from others. */
+ case PREFIXLEN_LEGACY:
+ if (family == AF_INET) {
+ r = in4_addr_default_prefixlen(&buffer.in, &k);
+ if (r < 0)
+ return r;
+ } else
+ k = 0;
+ break;
+ default:
+ assert_not_reached("Invalid prefixlen mode");
+ }
+
+ if (ret_family)
+ *ret_family = family;
+ if (ret_prefix)
+ *ret_prefix = buffer;
+ if (ret_prefixlen)
+ *ret_prefixlen = k;
+
+ return 0;
+
+}
+
+static void in_addr_data_hash_func(const struct in_addr_data *a, struct siphash *state) {
+ siphash24_compress(&a->family, sizeof(a->family), state);
+ siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state);
+}
+
+static int in_addr_data_compare_func(const struct in_addr_data *x, const struct in_addr_data *y) {
+ int r;
+
+ r = CMP(x->family, y->family);
+ if (r != 0)
+ return r;
+
+ return memcmp(&x->address, &y->address, FAMILY_ADDRESS_SIZE(x->family));
+}
+
+DEFINE_HASH_OPS(in_addr_data_hash_ops, struct in_addr_data, in_addr_data_hash_func, in_addr_data_compare_func);
diff --git a/src/basic/in-addr-util.h b/src/basic/in-addr-util.h
new file mode 100644
index 0000000..3069790
--- /dev/null
+++ b/src/basic/in-addr-util.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <netinet/in.h>
+#include <stddef.h>
+#include <sys/socket.h>
+
+#include "hash-funcs.h"
+#include "macro.h"
+#include "util.h"
+
+union in_addr_union {
+ struct in_addr in;
+ struct in6_addr in6;
+};
+
+struct in_addr_data {
+ int family;
+ union in_addr_union address;
+};
+
+bool in4_addr_is_null(const struct in_addr *a);
+int in_addr_is_null(int family, const union in_addr_union *u);
+
+int in_addr_is_multicast(int family, const union in_addr_union *u);
+
+bool in4_addr_is_link_local(const struct in_addr *a);
+int in_addr_is_link_local(int family, const union in_addr_union *u);
+
+bool in4_addr_is_localhost(const struct in_addr *a);
+int in_addr_is_localhost(int family, const union in_addr_union *u);
+
+int in_addr_equal(int family, const union in_addr_union *a, const union in_addr_union *b);
+int in_addr_prefix_intersect(int family, const union in_addr_union *a, unsigned aprefixlen, const union in_addr_union *b, unsigned bprefixlen);
+int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen);
+int in_addr_to_string(int family, const union in_addr_union *u, char **ret);
+int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret);
+int in_addr_from_string(int family, const char *s, union in_addr_union *ret);
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret);
+int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex);
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr);
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
+int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen);
+int in_addr_prefix_covers(int family, const union in_addr_union *prefix, unsigned char prefixlen, const union in_addr_union *address);
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret);
+int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
+
+typedef enum InAddrPrefixLenMode {
+ PREFIXLEN_FULL, /* Default to prefixlen of address size, 32 for IPv4 or 128 for IPv6, if not specified. */
+ PREFIXLEN_REFUSE, /* Fail with -ENOANO if prefixlen is not specified. */
+ PREFIXLEN_LEGACY, /* Default to legacy default prefixlen calculation from address if not specified. */
+} InAddrPrefixLenMode;
+
+int in_addr_prefix_from_string_auto_internal(const char *p, InAddrPrefixLenMode mode, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
+static inline int in_addr_prefix_from_string_auto(const char *p, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen) {
+ return in_addr_prefix_from_string_auto_internal(p, PREFIXLEN_FULL, ret_family, ret_prefix, ret_prefixlen);
+}
+
+static inline size_t FAMILY_ADDRESS_SIZE(int family) {
+ assert(IN_SET(family, AF_INET, AF_INET6));
+ return family == AF_INET6 ? 16 : 4;
+}
+
+/* Workaround for clang, explicitly specify the maximum-size element here.
+ * See also oss-fuzz#11344. */
+#define IN_ADDR_NULL ((union in_addr_union) { .in6 = {} })
+
+extern const struct hash_ops in_addr_data_hash_ops;
diff --git a/src/basic/io-util.c b/src/basic/io-util.c
new file mode 100644
index 0000000..575398f
--- /dev/null
+++ b/src/basic/io-util.c
@@ -0,0 +1,264 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <poll.h>
+#include <stdio.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "io-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+int flush_fd(int fd) {
+ struct pollfd pollfd = {
+ .fd = fd,
+ .events = POLLIN,
+ };
+ int count = 0;
+
+ /* Read from the specified file descriptor, until POLLIN is not set anymore, throwing away everything
+ * read. Note that some file descriptors (notable IP sockets) will trigger POLLIN even when no data can be read
+ * (due to IP packet checksum mismatches), hence this function is only safe to be non-blocking if the fd used
+ * was set to non-blocking too. */
+
+ for (;;) {
+ char buf[LINE_MAX];
+ ssize_t l;
+ int r;
+
+ r = poll(&pollfd, 1, 0);
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return -errno;
+
+ } else if (r == 0)
+ return count;
+
+ l = read(fd, buf, sizeof(buf));
+ if (l < 0) {
+
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN)
+ return count;
+
+ return -errno;
+ } else if (l == 0)
+ return count;
+
+ count += (int) l;
+ }
+}
+
+ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll) {
+ uint8_t *p = buf;
+ ssize_t n = 0;
+
+ assert(fd >= 0);
+ assert(buf);
+
+ /* If called with nbytes == 0, let's call read() at least
+ * once, to validate the operation */
+
+ if (nbytes > (size_t) SSIZE_MAX)
+ return -EINVAL;
+
+ do {
+ ssize_t k;
+
+ k = read(fd, p, nbytes);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN && do_poll) {
+
+ /* We knowingly ignore any return value here,
+ * and expect that any error/EOF is reported
+ * via read() */
+
+ (void) fd_wait_for_event(fd, POLLIN, USEC_INFINITY);
+ continue;
+ }
+
+ return n > 0 ? n : -errno;
+ }
+
+ if (k == 0)
+ return n;
+
+ assert((size_t) k <= nbytes);
+
+ p += k;
+ nbytes -= k;
+ n += k;
+ } while (nbytes > 0);
+
+ return n;
+}
+
+int loop_read_exact(int fd, void *buf, size_t nbytes, bool do_poll) {
+ ssize_t n;
+
+ n = loop_read(fd, buf, nbytes, do_poll);
+ if (n < 0)
+ return (int) n;
+ if ((size_t) n != nbytes)
+ return -EIO;
+
+ return 0;
+}
+
+int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) {
+ const uint8_t *p = buf;
+
+ assert(fd >= 0);
+ assert(buf);
+
+ if (_unlikely_(nbytes > (size_t) SSIZE_MAX))
+ return -EINVAL;
+
+ do {
+ ssize_t k;
+
+ k = write(fd, p, nbytes);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN && do_poll) {
+ /* We knowingly ignore any return value here,
+ * and expect that any error/EOF is reported
+ * via write() */
+
+ (void) fd_wait_for_event(fd, POLLOUT, USEC_INFINITY);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ if (_unlikely_(nbytes > 0 && k == 0)) /* Can't really happen */
+ return -EIO;
+
+ assert((size_t) k <= nbytes);
+
+ p += k;
+ nbytes -= k;
+ } while (nbytes > 0);
+
+ return 0;
+}
+
+int pipe_eof(int fd) {
+ struct pollfd pollfd = {
+ .fd = fd,
+ .events = POLLIN|POLLHUP,
+ };
+
+ int r;
+
+ r = poll(&pollfd, 1, 0);
+ if (r < 0)
+ return -errno;
+
+ if (r == 0)
+ return 0;
+
+ return pollfd.revents & POLLHUP;
+}
+
+int fd_wait_for_event(int fd, int event, usec_t t) {
+
+ struct pollfd pollfd = {
+ .fd = fd,
+ .events = event,
+ };
+
+ struct timespec ts;
+ int r;
+
+ r = ppoll(&pollfd, 1, t == USEC_INFINITY ? NULL : timespec_store(&ts, t), NULL);
+ if (r < 0)
+ return -errno;
+ if (r == 0)
+ return 0;
+
+ return pollfd.revents;
+}
+
+static size_t nul_length(const uint8_t *p, size_t sz) {
+ size_t n = 0;
+
+ while (sz > 0) {
+ if (*p != 0)
+ break;
+
+ n++;
+ p++;
+ sz--;
+ }
+
+ return n;
+}
+
+ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length) {
+ const uint8_t *q, *w, *e;
+ ssize_t l;
+
+ q = w = p;
+ e = q + sz;
+ while (q < e) {
+ size_t n;
+
+ n = nul_length(q, e - q);
+
+ /* If there are more than the specified run length of
+ * NUL bytes, or if this is the beginning or the end
+ * of the buffer, then seek instead of write */
+ if ((n > run_length) ||
+ (n > 0 && q == p) ||
+ (n > 0 && q + n >= e)) {
+ if (q > w) {
+ l = write(fd, w, q - w);
+ if (l < 0)
+ return -errno;
+ if (l != q -w)
+ return -EIO;
+ }
+
+ if (lseek(fd, n, SEEK_CUR) == (off_t) -1)
+ return -errno;
+
+ q += n;
+ w = q;
+ } else if (n > 0)
+ q += n;
+ else
+ q++;
+ }
+
+ if (q > w) {
+ l = write(fd, w, q - w);
+ if (l < 0)
+ return -errno;
+ if (l != q - w)
+ return -EIO;
+ }
+
+ return q - (const uint8_t*) p;
+}
+
+char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value) {
+ char *x;
+
+ x = strappend(field, value);
+ if (x)
+ iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(x);
+ return x;
+}
diff --git a/src/basic/io-util.h b/src/basic/io-util.h
new file mode 100644
index 0000000..792a64a
--- /dev/null
+++ b/src/basic/io-util.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include "macro.h"
+#include "time-util.h"
+
+int flush_fd(int fd);
+
+ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll);
+int loop_read_exact(int fd, void *buf, size_t nbytes, bool do_poll);
+int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll);
+
+int pipe_eof(int fd);
+
+int fd_wait_for_event(int fd, int event, usec_t timeout);
+
+ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length);
+
+static inline size_t IOVEC_TOTAL_SIZE(const struct iovec *i, size_t n) {
+ size_t j, r = 0;
+
+ for (j = 0; j < n; j++)
+ r += i[j].iov_len;
+
+ return r;
+}
+
+static inline size_t IOVEC_INCREMENT(struct iovec *i, size_t n, size_t k) {
+ size_t j;
+
+ for (j = 0; j < n; j++) {
+ size_t sub;
+
+ if (_unlikely_(k <= 0))
+ break;
+
+ sub = MIN(i[j].iov_len, k);
+ i[j].iov_len -= sub;
+ i[j].iov_base = (uint8_t*) i[j].iov_base + sub;
+ k -= sub;
+ }
+
+ return k;
+}
+
+static inline bool FILE_SIZE_VALID(uint64_t l) {
+ /* ftruncate() and friends take an unsigned file size, but actually cannot deal with file sizes larger than
+ * 2^63 since the kernel internally handles it as signed value. This call allows checking for this early. */
+
+ return (l >> 63) == 0;
+}
+
+static inline bool FILE_SIZE_VALID_OR_INFINITY(uint64_t l) {
+
+ /* Same as above, but allows one extra value: -1 as indication for infinity. */
+
+ if (l == (uint64_t) -1)
+ return true;
+
+ return FILE_SIZE_VALID(l);
+
+}
+
+#define IOVEC_INIT(base, len) { .iov_base = (base), .iov_len = (len) }
+#define IOVEC_MAKE(base, len) (struct iovec) IOVEC_INIT(base, len)
+#define IOVEC_INIT_STRING(string) IOVEC_INIT((char*) string, strlen(string))
+#define IOVEC_MAKE_STRING(string) (struct iovec) IOVEC_INIT_STRING(string)
+
+char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value);
diff --git a/src/basic/ioprio.h b/src/basic/ioprio.h
new file mode 100644
index 0000000..3fb168d
--- /dev/null
+++ b/src/basic/ioprio.h
@@ -0,0 +1,56 @@
+#ifndef IOPRIO_H
+#define IOPRIO_H
+
+/* This is minimal version of Linux' linux/ioprio.h header file, which
+ * is licensed GPL2 */
+
+#include <sys/syscall.h>
+#include <unistd.h>
+
+/*
+ * Gives us 8 prio classes with 13-bits of data for each class
+ */
+#define IOPRIO_BITS 16
+#define IOPRIO_N_CLASSES 8
+#define IOPRIO_CLASS_SHIFT 13
+#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
+
+#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
+#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
+#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
+
+#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE)
+
+/*
+ * These are the io priority groups as implemented by CFQ. RT is the realtime
+ * class, it always gets premium service. BE is the best-effort scheduling
+ * class, the default for any process. IDLE is the idle scheduling class, it
+ * is only served when no one else is using the disk.
+ */
+enum {
+ IOPRIO_CLASS_NONE,
+ IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE,
+ IOPRIO_CLASS_IDLE,
+};
+
+/*
+ * 8 best effort priority levels are supported
+ */
+#define IOPRIO_BE_NR (8)
+
+enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+};
+
+static inline int ioprio_set(int which, int who, int ioprio) {
+ return syscall(__NR_ioprio_set, which, who, ioprio);
+}
+
+static inline int ioprio_get(int which, int who) {
+ return syscall(__NR_ioprio_get, which, who);
+}
+
+#endif
diff --git a/src/basic/khash.c b/src/basic/khash.c
new file mode 100644
index 0000000..847f257
--- /dev/null
+++ b/src/basic/khash.c
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/if_alg.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "khash.h"
+#include "macro.h"
+#include "missing.h"
+#include "string-util.h"
+#include "util.h"
+
+/* On current kernels the maximum digest (according to "grep digestsize /proc/crypto | sort -u") is actually 32, but
+ * let's add some extra room, the few wasted bytes don't really matter... */
+#define LONGEST_DIGEST 128
+
+struct khash {
+ int fd;
+ char *algorithm;
+ uint8_t digest[LONGEST_DIGEST+1];
+ size_t digest_size;
+ bool digest_valid;
+};
+
+int khash_supported(void) {
+ static const union {
+ struct sockaddr sa;
+ struct sockaddr_alg alg;
+ } sa = {
+ .alg.salg_family = AF_ALG,
+ .alg.salg_type = "hash",
+ .alg.salg_name = "sha256", /* a very common algorithm */
+ };
+
+ static int cached = -1;
+
+ if (cached < 0) {
+ _cleanup_close_ int fd1 = -1, fd2 = -1;
+ uint8_t buf[LONGEST_DIGEST+1];
+
+ fd1 = socket(AF_ALG, SOCK_SEQPACKET|SOCK_CLOEXEC, 0);
+ if (fd1 < 0) {
+ /* The kernel returns EAFNOSUPPORT if AF_ALG is not supported at all */
+ if (IN_SET(errno, EAFNOSUPPORT, EOPNOTSUPP))
+ return (cached = false);
+
+ return -errno;
+ }
+
+ if (bind(fd1, &sa.sa, sizeof(sa)) < 0) {
+ /* The kernel returns ENOENT if the selected algorithm is not supported at all. We use a check
+ * for SHA256 as a proxy for whether the whole API is supported at all. After all it's one of
+ * the most common hash functions, and if it isn't supported, that's ample indication that
+ * something is really off. */
+
+ if (IN_SET(errno, ENOENT, EOPNOTSUPP))
+ return (cached = false);
+
+ return -errno;
+ }
+
+ fd2 = accept4(fd1, NULL, 0, SOCK_CLOEXEC);
+ if (fd2 < 0) {
+ if (errno == EOPNOTSUPP)
+ return (cached = false);
+
+ return -errno;
+ }
+
+ if (recv(fd2, buf, sizeof(buf), 0) < 0) {
+ /* On some kernels we get ENOKEY for non-keyed hash functions (such as sha256), let's refuse
+ * using the API in those cases, since the kernel is
+ * broken. https://github.com/systemd/systemd/issues/8278 */
+
+ if (IN_SET(errno, ENOKEY, EOPNOTSUPP))
+ return (cached = false);
+ }
+
+ cached = true;
+ }
+
+ return cached;
+}
+
+int khash_new_with_key(khash **ret, const char *algorithm, const void *key, size_t key_size) {
+ union {
+ struct sockaddr sa;
+ struct sockaddr_alg alg;
+ } sa = {
+ .alg.salg_family = AF_ALG,
+ .alg.salg_type = "hash",
+ };
+
+ _cleanup_(khash_unrefp) khash *h = NULL;
+ _cleanup_close_ int fd = -1;
+ int supported;
+ ssize_t n;
+
+ assert(ret);
+ assert(key || key_size == 0);
+
+ /* Filter out an empty algorithm early, as we do not support an algorithm by that name. */
+ if (isempty(algorithm))
+ return -EINVAL;
+
+ /* Overly long hash algorithm names we definitely do not support */
+ if (strlen(algorithm) >= sizeof(sa.alg.salg_name))
+ return -EOPNOTSUPP;
+
+ supported = khash_supported();
+ if (supported < 0)
+ return supported;
+ if (supported == 0)
+ return -EOPNOTSUPP;
+
+ fd = socket(AF_ALG, SOCK_SEQPACKET|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ strcpy((char*) sa.alg.salg_name, algorithm);
+ if (bind(fd, &sa.sa, sizeof(sa)) < 0) {
+ if (errno == ENOENT)
+ return -EOPNOTSUPP;
+ return -errno;
+ }
+
+ if (key) {
+ if (setsockopt(fd, SOL_ALG, ALG_SET_KEY, key, key_size) < 0)
+ return -errno;
+ }
+
+ h = new0(khash, 1);
+ if (!h)
+ return -ENOMEM;
+
+ h->fd = accept4(fd, NULL, 0, SOCK_CLOEXEC);
+ if (h->fd < 0)
+ return -errno;
+
+ h->algorithm = strdup(algorithm);
+ if (!h->algorithm)
+ return -ENOMEM;
+
+ /* Temporary fix for rc kernel bug: https://bugzilla.redhat.com/show_bug.cgi?id=1395896 */
+ (void) send(h->fd, NULL, 0, 0);
+
+ /* Figure out the digest size */
+ n = recv(h->fd, h->digest, sizeof(h->digest), 0);
+ if (n < 0)
+ return -errno;
+ if (n >= LONGEST_DIGEST) /* longer than what we expected? If so, we don't support this */
+ return -EOPNOTSUPP;
+
+ h->digest_size = (size_t) n;
+ h->digest_valid = true;
+
+ /* Temporary fix for rc kernel bug: https://bugzilla.redhat.com/show_bug.cgi?id=1395896 */
+ (void) send(h->fd, NULL, 0, 0);
+
+ *ret = h;
+ h = NULL;
+
+ return 0;
+}
+
+int khash_new(khash **ret, const char *algorithm) {
+ return khash_new_with_key(ret, algorithm, NULL, 0);
+}
+
+khash* khash_unref(khash *h) {
+ if (!h)
+ return NULL;
+
+ safe_close(h->fd);
+ free(h->algorithm);
+ return mfree(h);
+}
+
+int khash_dup(khash *h, khash **ret) {
+ _cleanup_(khash_unrefp) khash *copy = NULL;
+
+ assert(h);
+ assert(ret);
+
+ copy = newdup(khash, h, 1);
+ if (!copy)
+ return -ENOMEM;
+
+ copy->fd = -1;
+ copy->algorithm = strdup(h->algorithm);
+ if (!copy->algorithm)
+ return -ENOMEM;
+
+ copy->fd = accept4(h->fd, NULL, 0, SOCK_CLOEXEC);
+ if (copy->fd < 0)
+ return -errno;
+
+ *ret = TAKE_PTR(copy);
+
+ return 0;
+}
+
+const char *khash_get_algorithm(khash *h) {
+ assert(h);
+
+ return h->algorithm;
+}
+
+size_t khash_get_size(khash *h) {
+ assert(h);
+
+ return h->digest_size;
+}
+
+int khash_reset(khash *h) {
+ ssize_t n;
+
+ assert(h);
+
+ n = send(h->fd, NULL, 0, 0);
+ if (n < 0)
+ return -errno;
+
+ h->digest_valid = false;
+
+ return 0;
+}
+
+int khash_put(khash *h, const void *buffer, size_t size) {
+ ssize_t n;
+
+ assert(h);
+ assert(buffer || size == 0);
+
+ if (size <= 0)
+ return 0;
+
+ n = send(h->fd, buffer, size, MSG_MORE);
+ if (n < 0)
+ return -errno;
+
+ h->digest_valid = false;
+
+ return 0;
+}
+
+int khash_put_iovec(khash *h, const struct iovec *iovec, size_t n) {
+ struct msghdr mh = {
+ mh.msg_iov = (struct iovec*) iovec,
+ mh.msg_iovlen = n,
+ };
+ ssize_t k;
+
+ assert(h);
+ assert(iovec || n == 0);
+
+ if (n <= 0)
+ return 0;
+
+ k = sendmsg(h->fd, &mh, MSG_MORE);
+ if (k < 0)
+ return -errno;
+
+ h->digest_valid = false;
+
+ return 0;
+}
+
+static int retrieve_digest(khash *h) {
+ ssize_t n;
+
+ assert(h);
+
+ if (h->digest_valid)
+ return 0;
+
+ n = recv(h->fd, h->digest, h->digest_size, 0);
+ if (n < 0)
+ return n;
+ if ((size_t) n != h->digest_size) /* digest size changed? */
+ return -EIO;
+
+ h->digest_valid = true;
+
+ return 0;
+}
+
+int khash_digest_data(khash *h, const void **ret) {
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ r = retrieve_digest(h);
+ if (r < 0)
+ return r;
+
+ *ret = h->digest;
+ return 0;
+}
+
+int khash_digest_string(khash *h, char **ret) {
+ int r;
+ char *p;
+
+ assert(h);
+ assert(ret);
+
+ r = retrieve_digest(h);
+ if (r < 0)
+ return r;
+
+ p = hexmem(h->digest, h->digest_size);
+ if (!p)
+ return -ENOMEM;
+
+ *ret = p;
+ return 0;
+}
diff --git a/src/basic/khash.h b/src/basic/khash.h
new file mode 100644
index 0000000..a3013b9
--- /dev/null
+++ b/src/basic/khash.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include "macro.h"
+
+typedef struct khash khash;
+
+int khash_supported(void);
+
+/* For plain hash functions. Hash functions commonly supported on today's kernels are: crc32c, crct10dif, crc32,
+ * sha224, sha256, sha512, sha384, sha1, md5, md4, sha3-224, sha3-256, sha3-384, sha3-512, and more. */
+int khash_new(khash **ret, const char *algorithm);
+
+/* For keyed hash functions. Hash functions commonly supported on today's kernels are: hmac(sha256), cmac(aes),
+ * cmac(des3_ede), hmac(sha3-512), hmac(sha3-384), hmac(sha3-256), hmac(sha3-224), hmac(rmd160), hmac(rmd128),
+ * hmac(sha224), hmac(sha512), hmac(sha384), hmac(sha1), hmac(md5), and more. */
+int khash_new_with_key(khash **ret, const char *algorithm, const void *key, size_t key_size);
+
+int khash_dup(khash *h, khash **ret);
+khash* khash_unref(khash *h);
+
+const char *khash_get_algorithm(khash *h);
+size_t khash_get_size(khash *h);
+
+int khash_reset(khash *h);
+
+int khash_put(khash *h, const void *buffer, size_t size);
+int khash_put_iovec(khash *h, const struct iovec *iovec, size_t n);
+
+int khash_digest_data(khash *h, const void **ret);
+int khash_digest_string(khash *h, char **ret);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(khash*, khash_unref);
diff --git a/src/basic/label.c b/src/basic/label.c
new file mode 100644
index 0000000..12a7fb0
--- /dev/null
+++ b/src/basic/label.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "btrfs-util.h"
+#include "label.h"
+#include "macro.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+
+int label_fix(const char *path, LabelFixFlags flags) {
+ int r, q;
+
+ r = mac_selinux_fix(path, flags);
+ q = mac_smack_fix(path, flags);
+
+ if (r < 0)
+ return r;
+ if (q < 0)
+ return q;
+
+ return 0;
+}
+
+int symlink_label(const char *old_path, const char *new_path) {
+ int r;
+
+ assert(old_path);
+ assert(new_path);
+
+ r = mac_selinux_create_file_prepare(new_path, S_IFLNK);
+ if (r < 0)
+ return r;
+
+ if (symlink(old_path, new_path) < 0)
+ r = -errno;
+
+ mac_selinux_create_file_clear();
+
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix(new_path, 0);
+}
+
+int btrfs_subvol_make_label(const char *path) {
+ int r;
+
+ assert(path);
+
+ r = mac_selinux_create_file_prepare(path, S_IFDIR);
+ if (r < 0)
+ return r;
+
+ r = btrfs_subvol_make(path);
+ mac_selinux_create_file_clear();
+
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix(path, 0);
+}
diff --git a/src/basic/label.h b/src/basic/label.h
new file mode 100644
index 0000000..594fd65
--- /dev/null
+++ b/src/basic/label.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+typedef enum LabelFixFlags {
+ LABEL_IGNORE_ENOENT = 1 << 0,
+ LABEL_IGNORE_EROFS = 1 << 1,
+} LabelFixFlags;
+
+int label_fix(const char *path, LabelFixFlags flags);
+
+int mkdir_label(const char *path, mode_t mode);
+int mkdirat_label(int dirfd, const char *path, mode_t mode);
+int symlink_label(const char *old_path, const char *new_path);
+
+int btrfs_subvol_make_label(const char *path);
diff --git a/src/basic/list.h b/src/basic/list.h
new file mode 100644
index 0000000..f7f9700
--- /dev/null
+++ b/src/basic/list.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+/* The head of the linked list. Use this in the structure that shall
+ * contain the head of the linked list */
+#define LIST_HEAD(t,name) \
+ t *name
+
+/* The pointers in the linked list's items. Use this in the item structure */
+#define LIST_FIELDS(t,name) \
+ t *name##_next, *name##_prev
+
+/* Initialize the list's head */
+#define LIST_HEAD_INIT(head) \
+ do { \
+ (head) = NULL; \
+ } while (false)
+
+/* Initialize a list item */
+#define LIST_INIT(name,item) \
+ do { \
+ typeof(*(item)) *_item = (item); \
+ assert(_item); \
+ _item->name##_prev = _item->name##_next = NULL; \
+ } while (false)
+
+/* Prepend an item to the list */
+#define LIST_PREPEND(name,head,item) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_item = (item); \
+ assert(_item); \
+ if ((_item->name##_next = *_head)) \
+ _item->name##_next->name##_prev = _item; \
+ _item->name##_prev = NULL; \
+ *_head = _item; \
+ } while (false)
+
+/* Append an item to the list */
+#define LIST_APPEND(name,head,item) \
+ do { \
+ typeof(*(head)) **_hhead = &(head), *_tail; \
+ LIST_FIND_TAIL(name, *_hhead, _tail); \
+ LIST_INSERT_AFTER(name, *_hhead, _tail, item); \
+ } while (false)
+
+/* Remove an item from the list */
+#define LIST_REMOVE(name,head,item) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_item = (item); \
+ assert(_item); \
+ if (_item->name##_next) \
+ _item->name##_next->name##_prev = _item->name##_prev; \
+ if (_item->name##_prev) \
+ _item->name##_prev->name##_next = _item->name##_next; \
+ else { \
+ assert(*_head == _item); \
+ *_head = _item->name##_next; \
+ } \
+ _item->name##_next = _item->name##_prev = NULL; \
+ } while (false)
+
+/* Find the head of the list */
+#define LIST_FIND_HEAD(name,item,head) \
+ do { \
+ typeof(*(item)) *_item = (item); \
+ if (!_item) \
+ (head) = NULL; \
+ else { \
+ while (_item->name##_prev) \
+ _item = _item->name##_prev; \
+ (head) = _item; \
+ } \
+ } while (false)
+
+/* Find the tail of the list */
+#define LIST_FIND_TAIL(name,item,tail) \
+ do { \
+ typeof(*(item)) *_item = (item); \
+ if (!_item) \
+ (tail) = NULL; \
+ else { \
+ while (_item->name##_next) \
+ _item = _item->name##_next; \
+ (tail) = _item; \
+ } \
+ } while (false)
+
+/* Insert an item after another one (a = where, b = what) */
+#define LIST_INSERT_AFTER(name,head,a,b) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_a = (a), *_b = (b); \
+ assert(_b); \
+ if (!_a) { \
+ if ((_b->name##_next = *_head)) \
+ _b->name##_next->name##_prev = _b; \
+ _b->name##_prev = NULL; \
+ *_head = _b; \
+ } else { \
+ if ((_b->name##_next = _a->name##_next)) \
+ _b->name##_next->name##_prev = _b; \
+ _b->name##_prev = _a; \
+ _a->name##_next = _b; \
+ } \
+ } while (false)
+
+/* Insert an item before another one (a = where, b = what) */
+#define LIST_INSERT_BEFORE(name,head,a,b) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_a = (a), *_b = (b); \
+ assert(_b); \
+ if (!_a) { \
+ if (!*_head) { \
+ _b->name##_next = NULL; \
+ _b->name##_prev = NULL; \
+ *_head = _b; \
+ } else { \
+ typeof(*(head)) *_tail = (head); \
+ while (_tail->name##_next) \
+ _tail = _tail->name##_next; \
+ _b->name##_next = NULL; \
+ _b->name##_prev = _tail; \
+ _tail->name##_next = _b; \
+ } \
+ } else { \
+ if ((_b->name##_prev = _a->name##_prev)) \
+ _b->name##_prev->name##_next = _b; \
+ else \
+ *_head = _b; \
+ _b->name##_next = _a; \
+ _a->name##_prev = _b; \
+ } \
+ } while (false)
+
+#define LIST_JUST_US(name,item) \
+ (!(item)->name##_prev && !(item)->name##_next) \
+
+#define LIST_FOREACH(name,i,head) \
+ for ((i) = (head); (i); (i) = (i)->name##_next)
+
+#define LIST_FOREACH_SAFE(name,i,n,head) \
+ for ((i) = (head); (i) && (((n) = (i)->name##_next), 1); (i) = (n))
+
+#define LIST_FOREACH_BEFORE(name,i,p) \
+ for ((i) = (p)->name##_prev; (i); (i) = (i)->name##_prev)
+
+#define LIST_FOREACH_AFTER(name,i,p) \
+ for ((i) = (p)->name##_next; (i); (i) = (i)->name##_next)
+
+/* Iterate through all the members of the list p is included in, but skip over p */
+#define LIST_FOREACH_OTHERS(name,i,p) \
+ for (({ \
+ (i) = (p); \
+ while ((i) && (i)->name##_prev) \
+ (i) = (i)->name##_prev; \
+ if ((i) == (p)) \
+ (i) = (p)->name##_next; \
+ }); \
+ (i); \
+ (i) = (i)->name##_next == (p) ? (p)->name##_next : (i)->name##_next)
+
+/* Loop starting from p->next until p->prev.
+ p can be adjusted meanwhile. */
+#define LIST_LOOP_BUT_ONE(name,i,head,p) \
+ for ((i) = (p)->name##_next ? (p)->name##_next : (head); \
+ (i) != (p); \
+ (i) = (i)->name##_next ? (i)->name##_next : (head))
+
+#define LIST_IS_EMPTY(head) \
+ (!(head))
diff --git a/src/basic/locale-util.c b/src/basic/locale-util.c
new file mode 100644
index 0000000..fc1577a
--- /dev/null
+++ b/src/basic/locale-util.c
@@ -0,0 +1,460 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <ftw.h>
+#include <langinfo.h>
+#include <libintl.h>
+#include <locale.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "def.h"
+#include "dirent-util.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "locale-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+static int add_locales_from_archive(Set *locales) {
+ /* Stolen from glibc... */
+
+ struct locarhead {
+ uint32_t magic;
+ /* Serial number. */
+ uint32_t serial;
+ /* Name hash table. */
+ uint32_t namehash_offset;
+ uint32_t namehash_used;
+ uint32_t namehash_size;
+ /* String table. */
+ uint32_t string_offset;
+ uint32_t string_used;
+ uint32_t string_size;
+ /* Table with locale records. */
+ uint32_t locrectab_offset;
+ uint32_t locrectab_used;
+ uint32_t locrectab_size;
+ /* MD5 sum hash table. */
+ uint32_t sumhash_offset;
+ uint32_t sumhash_used;
+ uint32_t sumhash_size;
+ };
+
+ struct namehashent {
+ /* Hash value of the name. */
+ uint32_t hashval;
+ /* Offset of the name in the string table. */
+ uint32_t name_offset;
+ /* Offset of the locale record. */
+ uint32_t locrec_offset;
+ };
+
+ const struct locarhead *h;
+ const struct namehashent *e;
+ const void *p = MAP_FAILED;
+ _cleanup_close_ int fd = -1;
+ size_t sz = 0;
+ struct stat st;
+ size_t i;
+ int r;
+
+ fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISREG(st.st_mode))
+ return -EBADMSG;
+
+ if (st.st_size < (off_t) sizeof(struct locarhead))
+ return -EBADMSG;
+
+ p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED)
+ return -errno;
+
+ h = (const struct locarhead *) p;
+ if (h->magic != 0xde020109 ||
+ h->namehash_offset + h->namehash_size > st.st_size ||
+ h->string_offset + h->string_size > st.st_size ||
+ h->locrectab_offset + h->locrectab_size > st.st_size ||
+ h->sumhash_offset + h->sumhash_size > st.st_size) {
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
+ for (i = 0; i < h->namehash_size; i++) {
+ char *z;
+
+ if (e[i].locrec_offset == 0)
+ continue;
+
+ if (!utf8_is_valid((char*) p + e[i].name_offset))
+ continue;
+
+ z = strdup((char*) p + e[i].name_offset);
+ if (!z) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = set_consume(locales, z);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = 0;
+
+ finish:
+ if (p != MAP_FAILED)
+ munmap((void*) p, sz);
+
+ return r;
+}
+
+static int add_locales_from_libdir (Set *locales) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *entry;
+ int r;
+
+ dir = opendir("/usr/lib/locale");
+ if (!dir)
+ return errno == ENOENT ? 0 : -errno;
+
+ FOREACH_DIRENT(entry, dir, return -errno) {
+ char *z;
+
+ dirent_ensure_type(dir, entry);
+
+ if (entry->d_type != DT_DIR)
+ continue;
+
+ z = strdup(entry->d_name);
+ if (!z)
+ return -ENOMEM;
+
+ r = set_consume(locales, z);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ return 0;
+}
+
+int get_locales(char ***ret) {
+ _cleanup_set_free_ Set *locales = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+ locales = set_new(&string_hash_ops);
+ if (!locales)
+ return -ENOMEM;
+
+ r = add_locales_from_archive(locales);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ r = add_locales_from_libdir(locales);
+ if (r < 0)
+ return r;
+
+ l = set_get_strv(locales);
+ if (!l)
+ return -ENOMEM;
+
+ strv_sort(l);
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+}
+
+bool locale_is_valid(const char *name) {
+
+ if (isempty(name))
+ return false;
+
+ if (strlen(name) >= 128)
+ return false;
+
+ if (!utf8_is_valid(name))
+ return false;
+
+ if (!filename_is_valid(name))
+ return false;
+
+ if (!string_is_safe(name))
+ return false;
+
+ return true;
+}
+
+void init_gettext(void) {
+ setlocale(LC_ALL, "");
+ textdomain(GETTEXT_PACKAGE);
+}
+
+bool is_locale_utf8(void) {
+ const char *set;
+ static int cached_answer = -1;
+
+ /* Note that we default to 'true' here, since today UTF8 is
+ * pretty much supported everywhere. */
+
+ if (cached_answer >= 0)
+ goto out;
+
+ if (!setlocale(LC_ALL, "")) {
+ cached_answer = true;
+ goto out;
+ }
+
+ set = nl_langinfo(CODESET);
+ if (!set) {
+ cached_answer = true;
+ goto out;
+ }
+
+ if (streq(set, "UTF-8")) {
+ cached_answer = true;
+ goto out;
+ }
+
+ /* For LC_CTYPE=="C" return true, because CTYPE is effectly
+ * unset and everything can do to UTF-8 nowadays. */
+ set = setlocale(LC_CTYPE, NULL);
+ if (!set) {
+ cached_answer = true;
+ goto out;
+ }
+
+ /* Check result, but ignore the result if C was set
+ * explicitly. */
+ cached_answer =
+ STR_IN_SET(set, "C", "POSIX") &&
+ !getenv("LC_ALL") &&
+ !getenv("LC_CTYPE") &&
+ !getenv("LANG");
+
+out:
+ return (bool) cached_answer;
+}
+
+static thread_local Set *keymaps = NULL;
+
+static int nftw_cb(
+ const char *fpath,
+ const struct stat *sb,
+ int tflag,
+ struct FTW *ftwbuf) {
+
+ char *p, *e;
+ int r;
+
+ if (tflag != FTW_F)
+ return 0;
+
+ if (!endswith(fpath, ".map") &&
+ !endswith(fpath, ".map.gz"))
+ return 0;
+
+ p = strdup(basename(fpath));
+ if (!p)
+ return FTW_STOP;
+
+ e = endswith(p, ".map");
+ if (e)
+ *e = 0;
+
+ e = endswith(p, ".map.gz");
+ if (e)
+ *e = 0;
+
+ r = set_consume(keymaps, p);
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ return 0;
+}
+
+int get_keymaps(char ***ret) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *dir;
+ int r;
+
+ keymaps = set_new(&string_hash_ops);
+ if (!keymaps)
+ return -ENOMEM;
+
+ NULSTR_FOREACH(dir, KBD_KEYMAP_DIRS) {
+ r = nftw(dir, nftw_cb, 20, FTW_PHYS|FTW_ACTIONRETVAL);
+
+ if (r == FTW_STOP)
+ log_debug("Directory not found %s", dir);
+ else if (r < 0)
+ log_debug_errno(r, "Can't add keymap: %m");
+ }
+
+ l = set_get_strv(keymaps);
+ if (!l) {
+ set_free_free(keymaps);
+ return -ENOMEM;
+ }
+
+ set_free(keymaps);
+
+ if (strv_isempty(l))
+ return -ENOENT;
+
+ strv_sort(l);
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+}
+
+bool keymap_is_valid(const char *name) {
+
+ if (isempty(name))
+ return false;
+
+ if (strlen(name) >= 128)
+ return false;
+
+ if (!utf8_is_valid(name))
+ return false;
+
+ if (!filename_is_valid(name))
+ return false;
+
+ if (!string_is_safe(name))
+ return false;
+
+ return true;
+}
+
+static bool emoji_enabled(void) {
+ static int cached_emoji_enabled = -1;
+
+ if (cached_emoji_enabled < 0) {
+ int val;
+
+ val = getenv_bool("SYSTEMD_EMOJI");
+ if (val < 0)
+ cached_emoji_enabled =
+ is_locale_utf8() &&
+ !STRPTR_IN_SET(getenv("TERM"), "dumb", "linux");
+ else
+ cached_emoji_enabled = val;
+ }
+
+ return cached_emoji_enabled;
+}
+
+const char *special_glyph(SpecialGlyph code) {
+
+ /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be
+ * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still
+ * works reasonably well on the Linux console. For details see:
+ *
+ * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr
+ */
+
+ static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = {
+ /* ASCII fallback */
+ [false] = {
+ [SPECIAL_GLYPH_TREE_VERTICAL] = "| ",
+ [SPECIAL_GLYPH_TREE_BRANCH] = "|-",
+ [SPECIAL_GLYPH_TREE_RIGHT] = "`-",
+ [SPECIAL_GLYPH_TREE_SPACE] = " ",
+ [SPECIAL_GLYPH_TRIANGULAR_BULLET] = ">",
+ [SPECIAL_GLYPH_BLACK_CIRCLE] = "*",
+ [SPECIAL_GLYPH_BULLET] = "*",
+ [SPECIAL_GLYPH_ARROW] = "->",
+ [SPECIAL_GLYPH_MDASH] = "-",
+ [SPECIAL_GLYPH_ELLIPSIS] = "...",
+ [SPECIAL_GLYPH_MU] = "u",
+ [SPECIAL_GLYPH_CHECK_MARK] = "+",
+ [SPECIAL_GLYPH_CROSS_MARK] = "-",
+ [SPECIAL_GLYPH_ECSTATIC_SMILEY] = ":-]",
+ [SPECIAL_GLYPH_HAPPY_SMILEY] = ":-}",
+ [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = ":-)",
+ [SPECIAL_GLYPH_NEUTRAL_SMILEY] = ":-|",
+ [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(",
+ [SPECIAL_GLYPH_UNHAPPY_SMILEY] = ":-{️",
+ [SPECIAL_GLYPH_DEPRESSED_SMILEY] = ":-[",
+ },
+
+ /* UTF-8 */
+ [true] = {
+ [SPECIAL_GLYPH_TREE_VERTICAL] = "\342\224\202 ", /* │ */
+ [SPECIAL_GLYPH_TREE_BRANCH] = "\342\224\234\342\224\200", /* ├─ */
+ [SPECIAL_GLYPH_TREE_RIGHT] = "\342\224\224\342\224\200", /* └─ */
+ [SPECIAL_GLYPH_TREE_SPACE] = " ", /* */
+ [SPECIAL_GLYPH_TRIANGULAR_BULLET] = "\342\200\243", /* ‣ */
+ [SPECIAL_GLYPH_BLACK_CIRCLE] = "\342\227\217", /* ● */
+ [SPECIAL_GLYPH_BULLET] = "\342\200\242", /* • */
+ [SPECIAL_GLYPH_ARROW] = "\342\206\222", /* → */
+ [SPECIAL_GLYPH_MDASH] = "\342\200\223", /* – */
+ [SPECIAL_GLYPH_ELLIPSIS] = "\342\200\246", /* … */
+ [SPECIAL_GLYPH_MU] = "\316\274", /* μ */
+ [SPECIAL_GLYPH_CHECK_MARK] = "\342\234\223", /* ✓ */
+ [SPECIAL_GLYPH_CROSS_MARK] = "\342\234\227", /* ✗ */
+ [SPECIAL_GLYPH_ECSTATIC_SMILEY] = "\360\237\230\207", /* 😇 */
+ [SPECIAL_GLYPH_HAPPY_SMILEY] = "\360\237\230\200", /* 😀 */
+ [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = "\360\237\231\202", /* 🙂 */
+ [SPECIAL_GLYPH_NEUTRAL_SMILEY] = "\360\237\230\220", /* 😐 */
+ [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = "\360\237\231\201", /* 🙁 */
+ [SPECIAL_GLYPH_UNHAPPY_SMILEY] = "\360\237\230\250", /* 😨️️ */
+ [SPECIAL_GLYPH_DEPRESSED_SMILEY] = "\360\237\244\242", /* 🤢 */
+ },
+ };
+
+ assert(code < _SPECIAL_GLYPH_MAX);
+
+ return draw_table[code >= _SPECIAL_GLYPH_FIRST_SMILEY ? emoji_enabled() : is_locale_utf8()][code];
+}
+
+void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
+ LocaleVariable i;
+
+ if (!l)
+ return;
+
+ for (i = 0; i < _VARIABLE_LC_MAX; i++)
+ l[i] = mfree(l[i]);
+}
+
+static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
+ [VARIABLE_LANG] = "LANG",
+ [VARIABLE_LANGUAGE] = "LANGUAGE",
+ [VARIABLE_LC_CTYPE] = "LC_CTYPE",
+ [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
+ [VARIABLE_LC_TIME] = "LC_TIME",
+ [VARIABLE_LC_COLLATE] = "LC_COLLATE",
+ [VARIABLE_LC_MONETARY] = "LC_MONETARY",
+ [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
+ [VARIABLE_LC_PAPER] = "LC_PAPER",
+ [VARIABLE_LC_NAME] = "LC_NAME",
+ [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
+ [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
+ [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
+ [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);
diff --git a/src/basic/locale-util.h b/src/basic/locale-util.h
new file mode 100644
index 0000000..e64f0ce
--- /dev/null
+++ b/src/basic/locale-util.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <libintl.h>
+#include <stdbool.h>
+#include <locale.h>
+
+#include "macro.h"
+
+typedef enum LocaleVariable {
+ /* We don't list LC_ALL here on purpose. People should be
+ * using LANG instead. */
+
+ VARIABLE_LANG,
+ VARIABLE_LANGUAGE,
+ VARIABLE_LC_CTYPE,
+ VARIABLE_LC_NUMERIC,
+ VARIABLE_LC_TIME,
+ VARIABLE_LC_COLLATE,
+ VARIABLE_LC_MONETARY,
+ VARIABLE_LC_MESSAGES,
+ VARIABLE_LC_PAPER,
+ VARIABLE_LC_NAME,
+ VARIABLE_LC_ADDRESS,
+ VARIABLE_LC_TELEPHONE,
+ VARIABLE_LC_MEASUREMENT,
+ VARIABLE_LC_IDENTIFICATION,
+ _VARIABLE_LC_MAX,
+ _VARIABLE_LC_INVALID = -1
+} LocaleVariable;
+
+int get_locales(char ***l);
+bool locale_is_valid(const char *name);
+
+#define _(String) gettext(String)
+#define N_(String) String
+void init_gettext(void);
+
+bool is_locale_utf8(void);
+
+typedef enum {
+ SPECIAL_GLYPH_TREE_VERTICAL,
+ SPECIAL_GLYPH_TREE_BRANCH,
+ SPECIAL_GLYPH_TREE_RIGHT,
+ SPECIAL_GLYPH_TREE_SPACE,
+ SPECIAL_GLYPH_TRIANGULAR_BULLET,
+ SPECIAL_GLYPH_BLACK_CIRCLE,
+ SPECIAL_GLYPH_BULLET,
+ SPECIAL_GLYPH_ARROW,
+ SPECIAL_GLYPH_MDASH,
+ SPECIAL_GLYPH_ELLIPSIS,
+ SPECIAL_GLYPH_MU,
+ SPECIAL_GLYPH_CHECK_MARK,
+ SPECIAL_GLYPH_CROSS_MARK,
+ _SPECIAL_GLYPH_FIRST_SMILEY,
+ SPECIAL_GLYPH_ECSTATIC_SMILEY = _SPECIAL_GLYPH_FIRST_SMILEY,
+ SPECIAL_GLYPH_HAPPY_SMILEY,
+ SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY,
+ SPECIAL_GLYPH_NEUTRAL_SMILEY,
+ SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY,
+ SPECIAL_GLYPH_UNHAPPY_SMILEY,
+ SPECIAL_GLYPH_DEPRESSED_SMILEY,
+ _SPECIAL_GLYPH_MAX
+} SpecialGlyph;
+
+const char *special_glyph(SpecialGlyph code) _const_;
+
+const char* locale_variable_to_string(LocaleVariable i) _const_;
+LocaleVariable locale_variable_from_string(const char *s) _pure_;
+
+int get_keymaps(char ***l);
+bool keymap_is_valid(const char *name);
+
+static inline void freelocalep(locale_t *p) {
+ if (*p == (locale_t) 0)
+ return;
+
+ freelocale(*p);
+}
+
+void locale_variables_free(char* l[_VARIABLE_LC_MAX]);
+static inline void locale_variables_freep(char*(*l)[_VARIABLE_LC_MAX]) {
+ locale_variables_free(*l);
+}
diff --git a/src/basic/log.c b/src/basic/log.c
new file mode 100644
index 0000000..0486027
--- /dev/null
+++ b/src/basic/log.c
@@ -0,0 +1,1324 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+static LogTarget log_target = LOG_TARGET_CONSOLE;
+static int log_max_level[] = {LOG_INFO, LOG_INFO};
+assert_cc(ELEMENTSOF(log_max_level) == _LOG_REALM_MAX);
+static int log_facility = LOG_DAEMON;
+
+static int console_fd = STDERR_FILENO;
+static int syslog_fd = -1;
+static int kmsg_fd = -1;
+static int journal_fd = -1;
+
+static bool syslog_is_stream = false;
+
+static bool show_color = false;
+static bool show_location = false;
+
+static bool upgrade_syslog_to_journal = false;
+static bool always_reopen_console = false;
+static bool open_when_needed = false;
+static bool prohibit_ipc = false;
+
+/* Akin to glibc's __abort_msg; which is private and we hence cannot
+ * use here. */
+static char *log_abort_msg = NULL;
+
+/* An assert to use in logging functions that does not call recursively
+ * into our logging functions (since that might lead to a loop). */
+#define assert_raw(expr) \
+ do { \
+ if (_unlikely_(!(expr))) { \
+ fputs(#expr "\n", stderr); \
+ abort(); \
+ } \
+ } while (false)
+
+static void log_close_console(void) {
+ console_fd = safe_close_above_stdio(console_fd);
+}
+
+static int log_open_console(void) {
+
+ if (!always_reopen_console) {
+ console_fd = STDERR_FILENO;
+ return 0;
+ }
+
+ if (console_fd < 3) {
+ console_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (console_fd < 0)
+ return console_fd;
+
+ console_fd = fd_move_above_stdio(console_fd);
+ }
+
+ return 0;
+}
+
+static void log_close_kmsg(void) {
+ kmsg_fd = safe_close(kmsg_fd);
+}
+
+static int log_open_kmsg(void) {
+
+ if (kmsg_fd >= 0)
+ return 0;
+
+ kmsg_fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (kmsg_fd < 0)
+ return -errno;
+
+ kmsg_fd = fd_move_above_stdio(kmsg_fd);
+ return 0;
+}
+
+static void log_close_syslog(void) {
+ syslog_fd = safe_close(syslog_fd);
+}
+
+static int create_log_socket(int type) {
+ struct timeval tv;
+ int fd;
+
+ fd = socket(AF_UNIX, type|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ /* We need a blocking fd here since we'd otherwise lose messages way too early. However, let's not hang forever
+ * in the unlikely case of a deadlock. */
+ if (getpid_cached() == 1)
+ timeval_store(&tv, 10 * USEC_PER_MSEC);
+ else
+ timeval_store(&tv, 10 * USEC_PER_SEC);
+ (void) setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
+
+ return fd;
+}
+
+static int log_open_syslog(void) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/dev/log",
+ };
+
+ int r;
+
+ if (syslog_fd >= 0)
+ return 0;
+
+ syslog_fd = create_log_socket(SOCK_DGRAM);
+ if (syslog_fd < 0) {
+ r = syslog_fd;
+ goto fail;
+ }
+
+ if (connect(syslog_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
+ safe_close(syslog_fd);
+
+ /* Some legacy syslog systems still use stream
+ * sockets. They really shouldn't. But what can we
+ * do... */
+ syslog_fd = create_log_socket(SOCK_STREAM);
+ if (syslog_fd < 0) {
+ r = syslog_fd;
+ goto fail;
+ }
+
+ if (connect(syslog_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ syslog_is_stream = true;
+ } else
+ syslog_is_stream = false;
+
+ return 0;
+
+fail:
+ log_close_syslog();
+ return r;
+}
+
+static void log_close_journal(void) {
+ journal_fd = safe_close(journal_fd);
+}
+
+static int log_open_journal(void) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/socket",
+ };
+
+ int r;
+
+ if (journal_fd >= 0)
+ return 0;
+
+ journal_fd = create_log_socket(SOCK_DGRAM);
+ if (journal_fd < 0) {
+ r = journal_fd;
+ goto fail;
+ }
+
+ if (connect(journal_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ log_close_journal();
+ return r;
+}
+
+int log_open(void) {
+ int r;
+
+ /* Do not call from library code. */
+
+ /* If we don't use the console we close it here, to not get
+ * killed by SAK. If we don't use syslog we close it here so
+ * that we are not confused by somebody deleting the socket in
+ * the fs, and to make sure we don't use it if prohibit_ipc is
+ * set. If we don't use /dev/kmsg we still keep it open,
+ * because there is no reason to close it. */
+
+ if (log_target == LOG_TARGET_NULL) {
+ log_close_journal();
+ log_close_syslog();
+ log_close_console();
+ return 0;
+ }
+
+ if (log_target != LOG_TARGET_AUTO ||
+ getpid_cached() == 1 ||
+ isatty(STDERR_FILENO) <= 0) {
+
+ if (!prohibit_ipc &&
+ IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL)) {
+ r = log_open_journal();
+ if (r >= 0) {
+ log_close_syslog();
+ log_close_console();
+ return r;
+ }
+ }
+
+ if (!prohibit_ipc &&
+ IN_SET(log_target, LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_SYSLOG)) {
+ r = log_open_syslog();
+ if (r >= 0) {
+ log_close_journal();
+ log_close_console();
+ return r;
+ }
+ }
+
+ if (IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_KMSG)) {
+ r = log_open_kmsg();
+ if (r >= 0) {
+ log_close_journal();
+ log_close_syslog();
+ log_close_console();
+ return r;
+ }
+ }
+ }
+
+ log_close_journal();
+ log_close_syslog();
+
+ return log_open_console();
+}
+
+void log_set_target(LogTarget target) {
+ assert(target >= 0);
+ assert(target < _LOG_TARGET_MAX);
+
+ if (upgrade_syslog_to_journal) {
+ if (target == LOG_TARGET_SYSLOG)
+ target = LOG_TARGET_JOURNAL;
+ else if (target == LOG_TARGET_SYSLOG_OR_KMSG)
+ target = LOG_TARGET_JOURNAL_OR_KMSG;
+ }
+
+ log_target = target;
+}
+
+void log_close(void) {
+ /* Do not call from library code. */
+
+ log_close_journal();
+ log_close_syslog();
+ log_close_kmsg();
+ log_close_console();
+}
+
+void log_forget_fds(void) {
+ /* Do not call from library code. */
+
+ console_fd = kmsg_fd = syslog_fd = journal_fd = -1;
+}
+
+void log_set_max_level_realm(LogRealm realm, int level) {
+ assert((level & LOG_PRIMASK) == level);
+ assert(realm < ELEMENTSOF(log_max_level));
+
+ log_max_level[realm] = level;
+}
+
+void log_set_facility(int facility) {
+ log_facility = facility;
+}
+
+static int write_to_console(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *buffer) {
+
+ char location[256], prefix[1 + DECIMAL_STR_MAX(int) + 2];
+ struct iovec iovec[6] = {};
+ bool highlight;
+ size_t n = 0;
+
+ if (console_fd < 0)
+ return 0;
+
+ if (log_target == LOG_TARGET_CONSOLE_PREFIXED) {
+ xsprintf(prefix, "<%i>", level);
+ iovec[n++] = IOVEC_MAKE_STRING(prefix);
+ }
+
+ highlight = LOG_PRI(level) <= LOG_ERR && show_color;
+
+ if (show_location) {
+ (void) snprintf(location, sizeof location, "(%s:%i) ", file, line);
+ iovec[n++] = IOVEC_MAKE_STRING(location);
+ }
+
+ if (highlight)
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_HIGHLIGHT_RED);
+ iovec[n++] = IOVEC_MAKE_STRING(buffer);
+ if (highlight)
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_NORMAL);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
+
+ if (writev(console_fd, iovec, n) < 0) {
+
+ if (errno == EIO && getpid_cached() == 1) {
+
+ /* If somebody tried to kick us from our
+ * console tty (via vhangup() or suchlike),
+ * try to reconnect */
+
+ log_close_console();
+ log_open_console();
+
+ if (console_fd < 0)
+ return 0;
+
+ if (writev(console_fd, iovec, n) < 0)
+ return -errno;
+ } else
+ return -errno;
+ }
+
+ return 1;
+}
+
+static int write_to_syslog(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *buffer) {
+
+ char header_priority[2 + DECIMAL_STR_MAX(int) + 1],
+ header_time[64],
+ header_pid[4 + DECIMAL_STR_MAX(pid_t) + 1];
+ struct iovec iovec[5] = {};
+ struct msghdr msghdr = {
+ .msg_iov = iovec,
+ .msg_iovlen = ELEMENTSOF(iovec),
+ };
+ time_t t;
+ struct tm tm;
+
+ if (syslog_fd < 0)
+ return 0;
+
+ xsprintf(header_priority, "<%i>", level);
+
+ t = (time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC);
+ if (!localtime_r(&t, &tm))
+ return -EINVAL;
+
+ if (strftime(header_time, sizeof(header_time), "%h %e %T ", &tm) <= 0)
+ return -EINVAL;
+
+ xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
+
+ iovec[0] = IOVEC_MAKE_STRING(header_priority);
+ iovec[1] = IOVEC_MAKE_STRING(header_time);
+ iovec[2] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ iovec[3] = IOVEC_MAKE_STRING(header_pid);
+ iovec[4] = IOVEC_MAKE_STRING(buffer);
+
+ /* When using syslog via SOCK_STREAM separate the messages by NUL chars */
+ if (syslog_is_stream)
+ iovec[4].iov_len++;
+
+ for (;;) {
+ ssize_t n;
+
+ n = sendmsg(syslog_fd, &msghdr, MSG_NOSIGNAL);
+ if (n < 0)
+ return -errno;
+
+ if (!syslog_is_stream ||
+ (size_t) n >= IOVEC_TOTAL_SIZE(iovec, ELEMENTSOF(iovec)))
+ break;
+
+ IOVEC_INCREMENT(iovec, ELEMENTSOF(iovec), n);
+ }
+
+ return 1;
+}
+
+static int write_to_kmsg(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *buffer) {
+
+ char header_priority[2 + DECIMAL_STR_MAX(int) + 1],
+ header_pid[4 + DECIMAL_STR_MAX(pid_t) + 1];
+ struct iovec iovec[5] = {};
+
+ if (kmsg_fd < 0)
+ return 0;
+
+ xsprintf(header_priority, "<%i>", level);
+ xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
+
+ iovec[0] = IOVEC_MAKE_STRING(header_priority);
+ iovec[1] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ iovec[2] = IOVEC_MAKE_STRING(header_pid);
+ iovec[3] = IOVEC_MAKE_STRING(buffer);
+ iovec[4] = IOVEC_MAKE_STRING("\n");
+
+ if (writev(kmsg_fd, iovec, ELEMENTSOF(iovec)) < 0)
+ return -errno;
+
+ return 1;
+}
+
+static int log_do_header(
+ char *header,
+ size_t size,
+ int level,
+ int error,
+ const char *file, int line, const char *func,
+ const char *object_field, const char *object,
+ const char *extra_field, const char *extra) {
+ int r;
+
+ error = IS_SYNTHETIC_ERRNO(error) ? 0 : ERRNO_VALUE(error);
+
+ r = snprintf(header, size,
+ "PRIORITY=%i\n"
+ "SYSLOG_FACILITY=%i\n"
+ "%s%.256s%s" /* CODE_FILE */
+ "%s%.*i%s" /* CODE_LINE */
+ "%s%.256s%s" /* CODE_FUNC */
+ "%s%.*i%s" /* ERRNO */
+ "%s%.256s%s" /* object */
+ "%s%.256s%s" /* extra */
+ "SYSLOG_IDENTIFIER=%.256s\n",
+ LOG_PRI(level),
+ LOG_FAC(level),
+ isempty(file) ? "" : "CODE_FILE=",
+ isempty(file) ? "" : file,
+ isempty(file) ? "" : "\n",
+ line ? "CODE_LINE=" : "",
+ line ? 1 : 0, line, /* %.0d means no output too, special case for 0 */
+ line ? "\n" : "",
+ isempty(func) ? "" : "CODE_FUNC=",
+ isempty(func) ? "" : func,
+ isempty(func) ? "" : "\n",
+ error ? "ERRNO=" : "",
+ error ? 1 : 0, error,
+ error ? "\n" : "",
+ isempty(object) ? "" : object_field,
+ isempty(object) ? "" : object,
+ isempty(object) ? "" : "\n",
+ isempty(extra) ? "" : extra_field,
+ isempty(extra) ? "" : extra,
+ isempty(extra) ? "" : "\n",
+ program_invocation_short_name);
+ assert_raw((size_t) r < size);
+
+ return 0;
+}
+
+static int write_to_journal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *buffer) {
+
+ char header[LINE_MAX];
+ struct iovec iovec[4] = {};
+ struct msghdr mh = {};
+
+ if (journal_fd < 0)
+ return 0;
+
+ log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object, extra_field, extra);
+
+ iovec[0] = IOVEC_MAKE_STRING(header);
+ iovec[1] = IOVEC_MAKE_STRING("MESSAGE=");
+ iovec[2] = IOVEC_MAKE_STRING(buffer);
+ iovec[3] = IOVEC_MAKE_STRING("\n");
+
+ mh.msg_iov = iovec;
+ mh.msg_iovlen = ELEMENTSOF(iovec);
+
+ if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 1;
+}
+
+int log_dispatch_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ char *buffer) {
+
+ assert_raw(buffer);
+
+ if (log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ /* Patch in LOG_DAEMON facility if necessary */
+ if ((level & LOG_FACMASK) == 0)
+ level |= log_facility;
+
+ if (open_when_needed)
+ log_open();
+
+ do {
+ char *e;
+ int k = 0;
+
+ buffer += strspn(buffer, NEWLINE);
+
+ if (buffer[0] == 0)
+ break;
+
+ if ((e = strpbrk(buffer, NEWLINE)))
+ *(e++) = 0;
+
+ if (IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL)) {
+
+ k = write_to_journal(level, error, file, line, func, object_field, object, extra_field, extra, buffer);
+ if (k < 0 && k != -EAGAIN)
+ log_close_journal();
+ }
+
+ if (IN_SET(log_target, LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_SYSLOG)) {
+
+ k = write_to_syslog(level, error, file, line, func, buffer);
+ if (k < 0 && k != -EAGAIN)
+ log_close_syslog();
+ }
+
+ if (k <= 0 &&
+ IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_KMSG)) {
+
+ if (k < 0)
+ log_open_kmsg();
+
+ k = write_to_kmsg(level, error, file, line, func, buffer);
+ if (k < 0) {
+ log_close_kmsg();
+ log_open_console();
+ }
+ }
+
+ if (k <= 0)
+ (void) write_to_console(level, error, file, line, func, buffer);
+
+ buffer = e;
+ } while (buffer);
+
+ if (open_when_needed)
+ log_close();
+
+ return -ERRNO_VALUE(error);
+}
+
+int log_dump_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ char *buffer) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ PROTECT_ERRNO;
+
+ /* This modifies the buffer... */
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+ return -ERRNO_VALUE(error);
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer);
+}
+
+int log_internalv_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format,
+ va_list ap) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ char buffer[LINE_MAX];
+ PROTECT_ERRNO;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+ return -ERRNO_VALUE(error);
+
+ /* Make sure that %m maps to the specified error (or "Success"). */
+ errno = ERRNO_VALUE(error);
+
+ (void) vsnprintf(buffer, sizeof buffer, format, ap);
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer);
+}
+
+int log_internal_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = log_internalv_realm(level, error, file, line, func, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_printf_(10,0)
+static int log_object_internalv(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *format,
+ va_list ap) {
+
+ PROTECT_ERRNO;
+ char *buffer, *b;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[LOG_REALM_SYSTEMD]))
+ return -ERRNO_VALUE(error);
+
+ /* Make sure that %m maps to the specified error (or "Success"). */
+ errno = ERRNO_VALUE(error);
+
+ /* Prepend the object name before the message */
+ if (object) {
+ size_t n;
+
+ n = strlen(object);
+ buffer = newa(char, n + 2 + LINE_MAX);
+ b = stpcpy(stpcpy(buffer, object), ": ");
+ } else
+ b = buffer = newa(char, LINE_MAX);
+
+ (void) vsnprintf(b, LINE_MAX, format, ap);
+
+ return log_dispatch_internal(level, error, file, line, func,
+ object_field, object, extra_field, extra, buffer);
+}
+
+int log_object_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *format, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = log_object_internalv(level, error, file, line, func, object_field, object, extra_field, extra, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static void log_assert(
+ int level,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format) {
+
+ static char buffer[LINE_MAX];
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+ return;
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ (void) snprintf(buffer, sizeof buffer, format, text, file, line, func);
+ REENABLE_WARNING;
+
+ log_abort_msg = buffer;
+
+ log_dispatch_internal(level, 0, file, line, func, NULL, NULL, NULL, NULL, buffer);
+}
+
+_noreturn_ void log_assert_failed_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func) {
+ log_open();
+ log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_CRIT), text, file, line, func,
+ "Assertion '%s' failed at %s:%u, function %s(). Aborting.");
+ abort();
+}
+
+_noreturn_ void log_assert_failed_unreachable_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func) {
+ log_open();
+ log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_CRIT), text, file, line, func,
+ "Code should not be reached '%s' at %s:%u, function %s(). Aborting.");
+ abort();
+}
+
+void log_assert_failed_return_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func) {
+ PROTECT_ERRNO;
+ log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_DEBUG), text, file, line, func,
+ "Assertion '%s' failed at %s:%u, function %s(). Ignoring.");
+}
+
+int log_oom_internal(LogRealm realm, const char *file, int line, const char *func) {
+ return log_internal_realm(LOG_REALM_PLUS_LEVEL(realm, LOG_ERR),
+ ENOMEM, file, line, func, "Out of memory.");
+}
+
+int log_format_iovec(
+ struct iovec *iovec,
+ size_t iovec_len,
+ size_t *n,
+ bool newline_separator,
+ int error,
+ const char *format,
+ va_list ap) {
+
+ static const char nl = '\n';
+
+ while (format && *n + 1 < iovec_len) {
+ va_list aq;
+ char *m;
+ int r;
+
+ /* We need to copy the va_list structure,
+ * since vasprintf() leaves it afterwards at
+ * an undefined location */
+
+ errno = ERRNO_VALUE(error);
+
+ va_copy(aq, ap);
+ r = vasprintf(&m, format, aq);
+ va_end(aq);
+ if (r < 0)
+ return -EINVAL;
+
+ /* Now, jump enough ahead, so that we point to
+ * the next format string */
+ VA_FORMAT_ADVANCE(format, ap);
+
+ iovec[(*n)++] = IOVEC_MAKE_STRING(m);
+
+ if (newline_separator) {
+ iovec[*n] = IOVEC_MAKE((char *)&nl, 1);
+ (*n)++;
+ }
+
+ format = va_arg(ap, char *);
+ }
+ return 0;
+}
+
+int log_struct_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ char buf[LINE_MAX];
+ bool found = false;
+ PROTECT_ERRNO;
+ va_list ap;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]) ||
+ log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ if ((level & LOG_FACMASK) == 0)
+ level |= log_facility;
+
+ if (IN_SET(log_target,
+ LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL)) {
+
+ if (open_when_needed)
+ log_open_journal();
+
+ if (journal_fd >= 0) {
+ char header[LINE_MAX];
+ struct iovec iovec[17] = {};
+ size_t n = 0, i;
+ int r;
+ struct msghdr mh = {
+ .msg_iov = iovec,
+ };
+ bool fallback = false;
+
+ /* If the journal is available do structured logging.
+ * Do not report the errno if it is synthetic. */
+ log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
+ iovec[n++] = IOVEC_MAKE_STRING(header);
+
+ va_start(ap, format);
+ r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, true, error, format, ap);
+ if (r < 0)
+ fallback = true;
+ else {
+ mh.msg_iovlen = n;
+ (void) sendmsg(journal_fd, &mh, MSG_NOSIGNAL);
+ }
+
+ va_end(ap);
+ for (i = 1; i < n; i += 2)
+ free(iovec[i].iov_base);
+
+ if (!fallback) {
+ if (open_when_needed)
+ log_close();
+
+ return -ERRNO_VALUE(error);
+ }
+ }
+ }
+
+ /* Fallback if journal logging is not available or didn't work. */
+
+ va_start(ap, format);
+ while (format) {
+ va_list aq;
+
+ errno = ERRNO_VALUE(error);
+
+ va_copy(aq, ap);
+ (void) vsnprintf(buf, sizeof buf, format, aq);
+ va_end(aq);
+
+ if (startswith(buf, "MESSAGE=")) {
+ found = true;
+ break;
+ }
+
+ VA_FORMAT_ADVANCE(format, ap);
+
+ format = va_arg(ap, char *);
+ }
+ va_end(ap);
+
+ if (!found) {
+ if (open_when_needed)
+ log_close();
+
+ return -ERRNO_VALUE(error);
+ }
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buf + 8);
+}
+
+int log_struct_iovec_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const struct iovec input_iovec[],
+ size_t n_input_iovec) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ PROTECT_ERRNO;
+ size_t i;
+ char *m;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]) ||
+ log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ if ((level & LOG_FACMASK) == 0)
+ level |= log_facility;
+
+ if (IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL) &&
+ journal_fd >= 0) {
+
+ struct iovec iovec[1 + n_input_iovec*2];
+ char header[LINE_MAX];
+ struct msghdr mh = {
+ .msg_iov = iovec,
+ .msg_iovlen = 1 + n_input_iovec*2,
+ };
+
+ log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
+ iovec[0] = IOVEC_MAKE_STRING(header);
+
+ for (i = 0; i < n_input_iovec; i++) {
+ iovec[1+i*2] = input_iovec[i];
+ iovec[1+i*2+1] = IOVEC_MAKE_STRING("\n");
+ }
+
+ if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) >= 0)
+ return -ERRNO_VALUE(error);
+ }
+
+ for (i = 0; i < n_input_iovec; i++)
+ if (memory_startswith(input_iovec[i].iov_base, input_iovec[i].iov_len, "MESSAGE="))
+ break;
+
+ if (_unlikely_(i >= n_input_iovec)) /* Couldn't find MESSAGE=? */
+ return -ERRNO_VALUE(error);
+
+ m = strndupa(input_iovec[i].iov_base + STRLEN("MESSAGE="),
+ input_iovec[i].iov_len - STRLEN("MESSAGE="));
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, m);
+}
+
+int log_set_target_from_string(const char *e) {
+ LogTarget t;
+
+ t = log_target_from_string(e);
+ if (t < 0)
+ return -EINVAL;
+
+ log_set_target(t);
+ return 0;
+}
+
+int log_set_max_level_from_string_realm(LogRealm realm, const char *e) {
+ int t;
+
+ t = log_level_from_string(e);
+ if (t < 0)
+ return -EINVAL;
+
+ log_set_max_level_realm(realm, t);
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+
+ /*
+ * The systemd.log_xyz= settings are parsed by all tools, and
+ * so is "debug".
+ *
+ * However, "quiet" is only parsed by PID 1, and only turns of
+ * status output to /dev/console, but does not alter the log
+ * level.
+ */
+
+ if (streq(key, "debug") && !value)
+ log_set_max_level(LOG_DEBUG);
+
+ else if (proc_cmdline_key_streq(key, "systemd.log_target")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (log_set_target_from_string(value) < 0)
+ log_warning("Failed to parse log target '%s'. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.log_level")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (log_set_max_level_from_string(value) < 0)
+ log_warning("Failed to parse log level '%s'. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.log_color")) {
+
+ if (log_show_color_from_string(value ?: "1") < 0)
+ log_warning("Failed to parse log color setting '%s'. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.log_location")) {
+
+ if (log_show_location_from_string(value ?: "1") < 0)
+ log_warning("Failed to parse log location setting '%s'. Ignoring.", value);
+ }
+
+ return 0;
+}
+
+void log_parse_environment_realm(LogRealm realm) {
+ /* Do not call from library code. */
+
+ const char *e;
+
+ if (get_ctty_devnr(0, NULL) < 0)
+ /* Only try to read the command line in daemons. We assume that anything that has a controlling tty is
+ user stuff. */
+ (void) proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+
+ e = getenv("SYSTEMD_LOG_TARGET");
+ if (e && log_set_target_from_string(e) < 0)
+ log_warning("Failed to parse log target '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_LEVEL");
+ if (e && log_set_max_level_from_string_realm(realm, e) < 0)
+ log_warning("Failed to parse log level '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_COLOR");
+ if (e && log_show_color_from_string(e) < 0)
+ log_warning("Failed to parse bool '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_LOCATION");
+ if (e && log_show_location_from_string(e) < 0)
+ log_warning("Failed to parse bool '%s'. Ignoring.", e);
+}
+
+LogTarget log_get_target(void) {
+ return log_target;
+}
+
+int log_get_max_level_realm(LogRealm realm) {
+ return log_max_level[realm];
+}
+
+void log_show_color(bool b) {
+ show_color = b;
+}
+
+bool log_get_show_color(void) {
+ return show_color;
+}
+
+void log_show_location(bool b) {
+ show_location = b;
+}
+
+bool log_get_show_location(void) {
+ return show_location;
+}
+
+int log_show_color_from_string(const char *e) {
+ int t;
+
+ t = parse_boolean(e);
+ if (t < 0)
+ return t;
+
+ log_show_color(t);
+ return 0;
+}
+
+int log_show_location_from_string(const char *e) {
+ int t;
+
+ t = parse_boolean(e);
+ if (t < 0)
+ return t;
+
+ log_show_location(t);
+ return 0;
+}
+
+bool log_on_console(void) {
+ if (IN_SET(log_target, LOG_TARGET_CONSOLE,
+ LOG_TARGET_CONSOLE_PREFIXED))
+ return true;
+
+ return syslog_fd < 0 && kmsg_fd < 0 && journal_fd < 0;
+}
+
+static const char *const log_target_table[_LOG_TARGET_MAX] = {
+ [LOG_TARGET_CONSOLE] = "console",
+ [LOG_TARGET_CONSOLE_PREFIXED] = "console-prefixed",
+ [LOG_TARGET_KMSG] = "kmsg",
+ [LOG_TARGET_JOURNAL] = "journal",
+ [LOG_TARGET_JOURNAL_OR_KMSG] = "journal-or-kmsg",
+ [LOG_TARGET_SYSLOG] = "syslog",
+ [LOG_TARGET_SYSLOG_OR_KMSG] = "syslog-or-kmsg",
+ [LOG_TARGET_AUTO] = "auto",
+ [LOG_TARGET_NULL] = "null",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(log_target, LogTarget);
+
+void log_received_signal(int level, const struct signalfd_siginfo *si) {
+ assert(si);
+
+ if (pid_is_valid(si->ssi_pid)) {
+ _cleanup_free_ char *p = NULL;
+
+ (void) get_process_comm(si->ssi_pid, &p);
+
+ log_full(level,
+ "Received SIG%s from PID %"PRIu32" (%s).",
+ signal_to_string(si->ssi_signo),
+ si->ssi_pid, strna(p));
+ } else
+ log_full(level,
+ "Received SIG%s.",
+ signal_to_string(si->ssi_signo));
+}
+
+int log_syntax_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ PROTECT_ERRNO;
+ char buffer[LINE_MAX];
+ va_list ap;
+ const char *unit_fmt = NULL;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[LOG_REALM_SYSTEMD]) ||
+ log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ errno = error;
+
+ va_start(ap, format);
+ (void) vsnprintf(buffer, sizeof buffer, format, ap);
+ va_end(ap);
+
+ if (unit)
+ unit_fmt = getpid_cached() == 1 ? "UNIT=%s" : "USER_UNIT=%s";
+
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ "CONFIG_FILE=%s", config_file,
+ "CONFIG_LINE=%u", config_line,
+ LOG_MESSAGE("%s:%u: %s", config_file, config_line, buffer),
+ unit_fmt, unit,
+ NULL);
+}
+
+int log_syntax_invalid_utf8_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ const char *file,
+ int line,
+ const char *func,
+ const char *rvalue) {
+
+ _cleanup_free_ char *p = NULL;
+
+ if (rvalue)
+ p = utf8_escape_invalid(rvalue);
+
+ log_syntax_internal(unit, level, config_file, config_line, 0, file, line, func,
+ "String is not UTF-8 clean, ignoring assignment: %s", strna(p));
+
+ return -EINVAL;
+}
+
+void log_set_upgrade_syslog_to_journal(bool b) {
+ upgrade_syslog_to_journal = b;
+
+ /* Make the change effective immediately */
+ if (b) {
+ if (log_target == LOG_TARGET_SYSLOG)
+ log_target = LOG_TARGET_JOURNAL;
+ else if (log_target == LOG_TARGET_SYSLOG_OR_KMSG)
+ log_target = LOG_TARGET_JOURNAL_OR_KMSG;
+ }
+}
+
+void log_set_always_reopen_console(bool b) {
+ always_reopen_console = b;
+}
+
+void log_set_open_when_needed(bool b) {
+ open_when_needed = b;
+}
+
+void log_set_prohibit_ipc(bool b) {
+ prohibit_ipc = b;
+}
+
+int log_emergency_level(void) {
+ /* Returns the log level to use for log_emergency() logging. We use LOG_EMERG only when we are PID 1, as only
+ * then the system of the whole system is obviously affected. */
+
+ return getpid_cached() == 1 ? LOG_EMERG : LOG_ERR;
+}
+
+int log_dup_console(void) {
+ int copy;
+
+ /* Duplicate the fd we use for fd logging if it's < 3 and use the copy from now on. This call is useful
+ * whenever we want to continue logging through the original fd, but want to rearrange stderr. */
+
+ if (console_fd >= 3)
+ return 0;
+
+ copy = fcntl(console_fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ console_fd = copy;
+ return 0;
+}
+
+void log_setup_service(void) {
+ /* Sets up logging the way it is most appropriate for running a program as a service. Note that using this
+ * doesn't make the binary unsuitable for invocation on the command line, as log output will still go to the
+ * terminal if invoked interactively. */
+
+ log_set_target(LOG_TARGET_AUTO);
+ log_parse_environment();
+ log_open();
+}
diff --git a/src/basic/log.h b/src/basic/log.h
new file mode 100644
index 0000000..17438d7
--- /dev/null
+++ b/src/basic/log.h
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <syslog.h>
+
+#include "macro.h"
+
+/* Some structures we reference but don't want to pull in headers for */
+struct iovec;
+struct signalfd_siginfo;
+
+typedef enum LogRealm {
+ LOG_REALM_SYSTEMD,
+ LOG_REALM_UDEV,
+ _LOG_REALM_MAX,
+} LogRealm;
+
+#ifndef LOG_REALM
+# define LOG_REALM LOG_REALM_SYSTEMD
+#endif
+
+typedef enum LogTarget{
+ LOG_TARGET_CONSOLE,
+ LOG_TARGET_CONSOLE_PREFIXED,
+ LOG_TARGET_KMSG,
+ LOG_TARGET_JOURNAL,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_SYSLOG,
+ LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_AUTO, /* console if stderr is tty, JOURNAL_OR_KMSG otherwise */
+ LOG_TARGET_NULL,
+ _LOG_TARGET_MAX,
+ _LOG_TARGET_INVALID = -1
+} LogTarget;
+
+/* Note to readers: << and >> have lower precedence than & and | */
+#define LOG_REALM_PLUS_LEVEL(realm, level) ((realm) << 10 | (level))
+#define LOG_REALM_REMOVE_LEVEL(realm_level) ((realm_level) >> 10)
+#define SYNTHETIC_ERRNO(num) (1 << 30 | (num))
+#define IS_SYNTHETIC_ERRNO(val) ((val) >> 30 & 1)
+#define ERRNO_VALUE(val) (abs(val) & 255)
+
+void log_set_target(LogTarget target);
+void log_set_max_level_realm(LogRealm realm, int level);
+#define log_set_max_level(level) \
+ log_set_max_level_realm(LOG_REALM, (level))
+
+void log_set_facility(int facility);
+
+int log_set_target_from_string(const char *e);
+int log_set_max_level_from_string_realm(LogRealm realm, const char *e);
+#define log_set_max_level_from_string(e) \
+ log_set_max_level_from_string_realm(LOG_REALM, (e))
+
+void log_show_color(bool b);
+bool log_get_show_color(void) _pure_;
+void log_show_location(bool b);
+bool log_get_show_location(void) _pure_;
+
+int log_show_color_from_string(const char *e);
+int log_show_location_from_string(const char *e);
+
+LogTarget log_get_target(void) _pure_;
+int log_get_max_level_realm(LogRealm realm) _pure_;
+#define log_get_max_level() \
+ log_get_max_level_realm(LOG_REALM)
+
+/* Functions below that open and close logs or configure logging based on the
+ * environment should not be called from library code — this is always a job
+ * for the application itself.
+ */
+
+int log_open(void);
+void log_close(void);
+void log_forget_fds(void);
+
+void log_parse_environment_realm(LogRealm realm);
+#define log_parse_environment() \
+ log_parse_environment_realm(LOG_REALM)
+
+int log_dispatch_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra,
+ const char *extra_field,
+ char *buffer);
+
+int log_internal_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) _printf_(6,7);
+#define log_internal(level, ...) \
+ log_internal_realm(LOG_REALM_PLUS_LEVEL(LOG_REALM, (level)), __VA_ARGS__)
+
+int log_internalv_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format,
+ va_list ap) _printf_(6,0);
+#define log_internalv(level, ...) \
+ log_internalv_realm(LOG_REALM_PLUS_LEVEL(LOG_REALM, (level)), __VA_ARGS__)
+
+/* Realm is fixed to LOG_REALM_SYSTEMD for those */
+int log_object_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *format, ...) _printf_(10,11);
+
+int log_struct_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) _printf_(6,0) _sentinel_;
+
+int log_oom_internal(
+ LogRealm realm,
+ const char *file,
+ int line,
+ const char *func);
+
+int log_format_iovec(
+ struct iovec *iovec,
+ size_t iovec_len,
+ size_t *n,
+ bool newline_separator,
+ int error,
+ const char *format,
+ va_list ap) _printf_(6, 0);
+
+int log_struct_iovec_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const struct iovec *input_iovec,
+ size_t n_input_iovec);
+
+/* This modifies the buffer passed! */
+int log_dump_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ char *buffer);
+
+/* Logging for various assertions */
+_noreturn_ void log_assert_failed_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func);
+#define log_assert_failed(text, ...) \
+ log_assert_failed_realm(LOG_REALM, (text), __VA_ARGS__)
+
+_noreturn_ void log_assert_failed_unreachable_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func);
+#define log_assert_failed_unreachable(text, ...) \
+ log_assert_failed_unreachable_realm(LOG_REALM, (text), __VA_ARGS__)
+
+void log_assert_failed_return_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func);
+#define log_assert_failed_return(text, ...) \
+ log_assert_failed_return_realm(LOG_REALM, (text), __VA_ARGS__)
+
+#define log_dispatch(level, error, buffer) \
+ log_dispatch_internal(level, error, __FILE__, __LINE__, __func__, NULL, NULL, NULL, NULL, buffer)
+
+/* Logging with level */
+#define log_full_errno_realm(realm, level, error, ...) \
+ ({ \
+ int _level = (level), _e = (error), _realm = (realm); \
+ (log_get_max_level_realm(_realm) >= LOG_PRI(_level)) \
+ ? log_internal_realm(LOG_REALM_PLUS_LEVEL(_realm, _level), _e, \
+ __FILE__, __LINE__, __func__, __VA_ARGS__) \
+ : -ERRNO_VALUE(_e); \
+ })
+
+#define log_full_errno(level, error, ...) \
+ log_full_errno_realm(LOG_REALM, (level), (error), __VA_ARGS__)
+
+#define log_full(level, ...) log_full_errno((level), 0, __VA_ARGS__)
+
+int log_emergency_level(void);
+
+/* Normal logging */
+#define log_debug(...) log_full(LOG_DEBUG, __VA_ARGS__)
+#define log_info(...) log_full(LOG_INFO, __VA_ARGS__)
+#define log_notice(...) log_full(LOG_NOTICE, __VA_ARGS__)
+#define log_warning(...) log_full(LOG_WARNING, __VA_ARGS__)
+#define log_error(...) log_full(LOG_ERR, __VA_ARGS__)
+#define log_emergency(...) log_full(log_emergency_level(), __VA_ARGS__)
+
+/* Logging triggered by an errno-like error */
+#define log_debug_errno(error, ...) log_full_errno(LOG_DEBUG, error, __VA_ARGS__)
+#define log_info_errno(error, ...) log_full_errno(LOG_INFO, error, __VA_ARGS__)
+#define log_notice_errno(error, ...) log_full_errno(LOG_NOTICE, error, __VA_ARGS__)
+#define log_warning_errno(error, ...) log_full_errno(LOG_WARNING, error, __VA_ARGS__)
+#define log_error_errno(error, ...) log_full_errno(LOG_ERR, error, __VA_ARGS__)
+#define log_emergency_errno(error, ...) log_full_errno(log_emergency_level(), error, __VA_ARGS__)
+
+#ifdef LOG_TRACE
+# define log_trace(...) log_debug(__VA_ARGS__)
+#else
+# define log_trace(...) do {} while (0)
+#endif
+
+/* Structured logging */
+#define log_struct_errno(level, error, ...) \
+ log_struct_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+ error, __FILE__, __LINE__, __func__, __VA_ARGS__, NULL)
+#define log_struct(level, ...) log_struct_errno(level, 0, __VA_ARGS__)
+
+#define log_struct_iovec_errno(level, error, iovec, n_iovec) \
+ log_struct_iovec_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+ error, __FILE__, __LINE__, __func__, iovec, n_iovec)
+#define log_struct_iovec(level, iovec, n_iovec) log_struct_iovec_errno(level, 0, iovec, n_iovec)
+
+/* This modifies the buffer passed! */
+#define log_dump(level, buffer) \
+ log_dump_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+ 0, __FILE__, __LINE__, __func__, buffer)
+
+#define log_oom() log_oom_internal(LOG_REALM, __FILE__, __LINE__, __func__)
+
+bool log_on_console(void) _pure_;
+
+const char *log_target_to_string(LogTarget target) _const_;
+LogTarget log_target_from_string(const char *s) _pure_;
+
+/* Helper to prepare various field for structured logging */
+#define LOG_MESSAGE(fmt, ...) "MESSAGE=" fmt, ##__VA_ARGS__
+
+void log_received_signal(int level, const struct signalfd_siginfo *si);
+
+/* If turned on, any requests for a log target involving "syslog" will be implicitly upgraded to the equivalent journal target */
+void log_set_upgrade_syslog_to_journal(bool b);
+
+/* If turned on, and log_open() is called, we'll not use STDERR_FILENO for logging ever, but rather open /dev/console */
+void log_set_always_reopen_console(bool b);
+
+/* If turned on, we'll open the log stream implicitly if needed on each individual log call. This is normally not
+ * desired as we want to reuse our logging streams. It is useful however */
+void log_set_open_when_needed(bool b);
+
+/* If turned on, then we'll never use IPC-based logging, i.e. never log to syslog or the journal. We'll only log to
+ * stderr, the console or kmsg */
+void log_set_prohibit_ipc(bool b);
+
+int log_dup_console(void);
+
+int log_syntax_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) _printf_(9, 10);
+
+int log_syntax_invalid_utf8_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ const char *file,
+ int line,
+ const char *func,
+ const char *rvalue);
+
+#define log_syntax(unit, level, config_file, config_line, error, ...) \
+ ({ \
+ int _level = (level), _e = (error); \
+ (log_get_max_level() >= LOG_PRI(_level)) \
+ ? log_syntax_internal(unit, _level, config_file, config_line, _e, __FILE__, __LINE__, __func__, __VA_ARGS__) \
+ : -abs(_e); \
+ })
+
+#define log_syntax_invalid_utf8(unit, level, config_file, config_line, rvalue) \
+ ({ \
+ int _level = (level); \
+ (log_get_max_level() >= LOG_PRI(_level)) \
+ ? log_syntax_invalid_utf8_internal(unit, _level, config_file, config_line, __FILE__, __LINE__, __func__, rvalue) \
+ : -EINVAL; \
+ })
+
+#define DEBUG_LOGGING _unlikely_(log_get_max_level() >= LOG_DEBUG)
+
+void log_setup_service(void);
diff --git a/src/basic/login-util.c b/src/basic/login-util.c
new file mode 100644
index 0000000..085ccd0
--- /dev/null
+++ b/src/basic/login-util.c
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "login-util.h"
+#include "string-util.h"
+
+bool session_id_valid(const char *id) {
+
+ if (isempty(id))
+ return false;
+
+ return id[strspn(id, LETTERS DIGITS)] == '\0';
+}
diff --git a/src/basic/login-util.h b/src/basic/login-util.h
new file mode 100644
index 0000000..e1e62e1
--- /dev/null
+++ b/src/basic/login-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <unistd.h>
+
+bool session_id_valid(const char *id);
+
+static inline bool logind_running(void) {
+ return access("/run/systemd/seats/", F_OK) >= 0;
+}
diff --git a/src/basic/macro.h b/src/basic/macro.h
new file mode 100644
index 0000000..1971e91
--- /dev/null
+++ b/src/basic/macro.h
@@ -0,0 +1,549 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#define _printf_(a, b) __attribute__((__format__(printf, a, b)))
+#ifdef __clang__
+# define _alloc_(...)
+#else
+# define _alloc_(...) __attribute__((__alloc_size__(__VA_ARGS__)))
+#endif
+#define _sentinel_ __attribute__((__sentinel__))
+#define _section_(x) __attribute__((__section__(x)))
+#define _used_ __attribute__((__used__))
+#define _unused_ __attribute__((__unused__))
+#define _destructor_ __attribute__((__destructor__))
+#define _pure_ __attribute__((__pure__))
+#define _const_ __attribute__((__const__))
+#define _deprecated_ __attribute__((__deprecated__))
+#define _packed_ __attribute__((__packed__))
+#define _malloc_ __attribute__((__malloc__))
+#define _weak_ __attribute__((__weak__))
+#define _likely_(x) (__builtin_expect(!!(x), 1))
+#define _unlikely_(x) (__builtin_expect(!!(x), 0))
+#define _public_ __attribute__((__visibility__("default")))
+#define _hidden_ __attribute__((__visibility__("hidden")))
+#define _weakref_(x) __attribute__((__weakref__(#x)))
+#define _align_(x) __attribute__((__aligned__(x)))
+#define _alignas_(x) __attribute__((__aligned__(__alignof(x))))
+#define _alignptr_ __attribute__((__aligned__(sizeof(void*))))
+#define _cleanup_(x) __attribute__((__cleanup__(x)))
+#if __GNUC__ >= 7
+#define _fallthrough_ __attribute__((__fallthrough__))
+#else
+#define _fallthrough_
+#endif
+/* Define C11 noreturn without <stdnoreturn.h> and even on older gcc
+ * compiler versions */
+#ifndef _noreturn_
+#if __STDC_VERSION__ >= 201112L
+#define _noreturn_ _Noreturn
+#else
+#define _noreturn_ __attribute__((__noreturn__))
+#endif
+#endif
+
+#if !defined(HAS_FEATURE_MEMORY_SANITIZER)
+# if defined(__has_feature)
+# if __has_feature(memory_sanitizer)
+# define HAS_FEATURE_MEMORY_SANITIZER 1
+# endif
+# endif
+# if !defined(HAS_FEATURE_MEMORY_SANITIZER)
+# define HAS_FEATURE_MEMORY_SANITIZER 0
+# endif
+#endif
+
+#if !defined(HAS_FEATURE_ADDRESS_SANITIZER)
+# ifdef __SANITIZE_ADDRESS__
+# define HAS_FEATURE_ADDRESS_SANITIZER 1
+# elif defined(__has_feature)
+# if __has_feature(address_sanitizer)
+# define HAS_FEATURE_ADDRESS_SANITIZER 1
+# endif
+# endif
+# if !defined(HAS_FEATURE_ADDRESS_SANITIZER)
+# define HAS_FEATURE_ADDRESS_SANITIZER 0
+# endif
+#endif
+
+/* Note: on GCC "no_sanitize_address" is a function attribute only, on llvm it may also be applied to global
+ * variables. We define a specific macro which knows this. Note that on GCC we don't need this decorator so much, since
+ * our primary usecase for this attribute is registration structures placed in named ELF sections which shall not be
+ * padded, but GCC doesn't pad those anyway if AddressSanitizer is enabled. */
+#if HAS_FEATURE_ADDRESS_SANITIZER && defined(__clang__)
+#define _variable_no_sanitize_address_ __attribute__((__no_sanitize_address__))
+#else
+#define _variable_no_sanitize_address_
+#endif
+
+/* Temporarily disable some warnings */
+#define DISABLE_WARNING_FORMAT_NONLITERAL \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wformat-nonliteral\"")
+
+#define DISABLE_WARNING_MISSING_PROTOTYPES \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wmissing-prototypes\"")
+
+#define DISABLE_WARNING_NONNULL \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wnonnull\"")
+
+#define DISABLE_WARNING_SHADOW \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wshadow\"")
+
+#define DISABLE_WARNING_INCOMPATIBLE_POINTER_TYPES \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wincompatible-pointer-types\"")
+
+#define REENABLE_WARNING \
+ _Pragma("GCC diagnostic pop")
+
+/* automake test harness */
+#define EXIT_TEST_SKIP 77
+
+#define XSTRINGIFY(x) #x
+#define STRINGIFY(x) XSTRINGIFY(x)
+
+#define XCONCATENATE(x, y) x ## y
+#define CONCATENATE(x, y) XCONCATENATE(x, y)
+
+#define UNIQ_T(x, uniq) CONCATENATE(__unique_prefix_, CONCATENATE(x, uniq))
+#define UNIQ __COUNTER__
+
+/* builtins */
+#if __SIZEOF_INT__ == 4
+#define BUILTIN_FFS_U32(x) __builtin_ffs(x);
+#elif __SIZEOF_LONG__ == 4
+#define BUILTIN_FFS_U32(x) __builtin_ffsl(x);
+#else
+#error "neither int nor long are four bytes long?!?"
+#endif
+
+/* Rounds up */
+
+#define ALIGN4(l) (((l) + 3) & ~3)
+#define ALIGN8(l) (((l) + 7) & ~7)
+
+#if __SIZEOF_POINTER__ == 8
+#define ALIGN(l) ALIGN8(l)
+#elif __SIZEOF_POINTER__ == 4
+#define ALIGN(l) ALIGN4(l)
+#else
+#error "Wut? Pointers are neither 4 nor 8 bytes long?"
+#endif
+
+#define ALIGN_PTR(p) ((void*) ALIGN((unsigned long) (p)))
+#define ALIGN4_PTR(p) ((void*) ALIGN4((unsigned long) (p)))
+#define ALIGN8_PTR(p) ((void*) ALIGN8((unsigned long) (p)))
+
+static inline size_t ALIGN_TO(size_t l, size_t ali) {
+ return ((l + ali - 1) & ~(ali - 1));
+}
+
+#define ALIGN_TO_PTR(p, ali) ((void*) ALIGN_TO((unsigned long) (p), (ali)))
+
+/* align to next higher power-of-2 (except for: 0 => 0, overflow => 0) */
+static inline unsigned long ALIGN_POWER2(unsigned long u) {
+ /* clz(0) is undefined */
+ if (u == 1)
+ return 1;
+
+ /* left-shift overflow is undefined */
+ if (__builtin_clzl(u - 1UL) < 1)
+ return 0;
+
+ return 1UL << (sizeof(u) * 8 - __builtin_clzl(u - 1UL));
+}
+
+#ifndef __COVERITY__
+# define VOID_0 ((void)0)
+#else
+# define VOID_0 ((void*)0)
+#endif
+
+#define ELEMENTSOF(x) \
+ (__builtin_choose_expr( \
+ !__builtin_types_compatible_p(typeof(x), typeof(&*(x))), \
+ sizeof(x)/sizeof((x)[0]), \
+ VOID_0))
+
+/*
+ * STRLEN - return the length of a string literal, minus the trailing NUL byte.
+ * Contrary to strlen(), this is a constant expression.
+ * @x: a string literal.
+ */
+#define STRLEN(x) (sizeof(""x"") - 1)
+
+/*
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr: the pointer to the member.
+ * @type: the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ */
+#define container_of(ptr, type, member) __container_of(UNIQ, (ptr), type, member)
+#define __container_of(uniq, ptr, type, member) \
+ ({ \
+ const typeof( ((type*)0)->member ) *UNIQ_T(A, uniq) = (ptr); \
+ (type*)( (char *)UNIQ_T(A, uniq) - offsetof(type, member) ); \
+ })
+
+#undef MAX
+#define MAX(a, b) __MAX(UNIQ, (a), UNIQ, (b))
+#define __MAX(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) > UNIQ_T(B, bq) ? UNIQ_T(A, aq) : UNIQ_T(B, bq); \
+ })
+
+/* evaluates to (void) if _A or _B are not constant or of different types */
+#define CONST_MAX(_A, _B) \
+ (__builtin_choose_expr( \
+ __builtin_constant_p(_A) && \
+ __builtin_constant_p(_B) && \
+ __builtin_types_compatible_p(typeof(_A), typeof(_B)), \
+ ((_A) > (_B)) ? (_A) : (_B), \
+ VOID_0))
+
+/* takes two types and returns the size of the larger one */
+#define MAXSIZE(A, B) (sizeof(union _packed_ { typeof(A) a; typeof(B) b; }))
+
+#define MAX3(x, y, z) \
+ ({ \
+ const typeof(x) _c = MAX(x, y); \
+ MAX(_c, z); \
+ })
+
+#undef MIN
+#define MIN(a, b) __MIN(UNIQ, (a), UNIQ, (b))
+#define __MIN(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) < UNIQ_T(B, bq) ? UNIQ_T(A, aq) : UNIQ_T(B, bq); \
+ })
+
+#define MIN3(x, y, z) \
+ ({ \
+ const typeof(x) _c = MIN(x, y); \
+ MIN(_c, z); \
+ })
+
+#define LESS_BY(a, b) __LESS_BY(UNIQ, (a), UNIQ, (b))
+#define __LESS_BY(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) > UNIQ_T(B, bq) ? UNIQ_T(A, aq) - UNIQ_T(B, bq) : 0; \
+ })
+
+#define CMP(a, b) __CMP(UNIQ, (a), UNIQ, (b))
+#define __CMP(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) < UNIQ_T(B, bq) ? -1 : \
+ UNIQ_T(A, aq) > UNIQ_T(B, bq) ? 1 : 0; \
+ })
+
+#undef CLAMP
+#define CLAMP(x, low, high) __CLAMP(UNIQ, (x), UNIQ, (low), UNIQ, (high))
+#define __CLAMP(xq, x, lowq, low, highq, high) \
+ ({ \
+ const typeof(x) UNIQ_T(X, xq) = (x); \
+ const typeof(low) UNIQ_T(LOW, lowq) = (low); \
+ const typeof(high) UNIQ_T(HIGH, highq) = (high); \
+ UNIQ_T(X, xq) > UNIQ_T(HIGH, highq) ? \
+ UNIQ_T(HIGH, highq) : \
+ UNIQ_T(X, xq) < UNIQ_T(LOW, lowq) ? \
+ UNIQ_T(LOW, lowq) : \
+ UNIQ_T(X, xq); \
+ })
+
+/* [(x + y - 1) / y] suffers from an integer overflow, even though the
+ * computation should be possible in the given type. Therefore, we use
+ * [x / y + !!(x % y)]. Note that on "Real CPUs" a division returns both the
+ * quotient and the remainder, so both should be equally fast. */
+#define DIV_ROUND_UP(x, y) __DIV_ROUND_UP(UNIQ, (x), UNIQ, (y))
+#define __DIV_ROUND_UP(xq, x, yq, y) \
+ ({ \
+ const typeof(x) UNIQ_T(X, xq) = (x); \
+ const typeof(y) UNIQ_T(Y, yq) = (y); \
+ (UNIQ_T(X, xq) / UNIQ_T(Y, yq) + !!(UNIQ_T(X, xq) % UNIQ_T(Y, yq))); \
+ })
+
+#ifdef __COVERITY__
+
+/* Use special definitions of assertion macros in order to prevent
+ * false positives of ASSERT_SIDE_EFFECT on Coverity static analyzer
+ * for uses of assert_se() and assert_return().
+ *
+ * These definitions make expression go through a (trivial) function
+ * call to ensure they are not discarded. Also use ! or !! to ensure
+ * the boolean expressions are seen as such.
+ *
+ * This technique has been described and recommended in:
+ * https://community.synopsys.com/s/question/0D534000046Yuzb/suppressing-assertsideeffect-for-functions-that-allow-for-sideeffects
+ */
+
+extern void __coverity_panic__(void);
+
+static inline int __coverity_check__(int condition) {
+ return condition;
+}
+
+#define assert_message_se(expr, message) \
+ do { \
+ if (__coverity_check__(!(expr))) \
+ __coverity_panic__(); \
+ } while (false)
+
+#define assert_log(expr, message) __coverity_check__(!!(expr))
+
+#else /* ! __COVERITY__ */
+
+#define assert_message_se(expr, message) \
+ do { \
+ if (_unlikely_(!(expr))) \
+ log_assert_failed(message, __FILE__, __LINE__, __PRETTY_FUNCTION__); \
+ } while (false)
+
+#define assert_log(expr, message) ((_likely_(expr)) \
+ ? (true) \
+ : (log_assert_failed_return(message, __FILE__, __LINE__, __PRETTY_FUNCTION__), false))
+
+#endif /* __COVERITY__ */
+
+#define assert_se(expr) assert_message_se(expr, #expr)
+
+/* We override the glibc assert() here. */
+#undef assert
+#ifdef NDEBUG
+#define assert(expr) do {} while (false)
+#else
+#define assert(expr) assert_message_se(expr, #expr)
+#endif
+
+#define assert_not_reached(t) \
+ do { \
+ log_assert_failed_unreachable(t, __FILE__, __LINE__, __PRETTY_FUNCTION__); \
+ } while (false)
+
+#if defined(static_assert)
+#define assert_cc(expr) \
+ static_assert(expr, #expr);
+#else
+#define assert_cc(expr) \
+ struct CONCATENATE(_assert_struct_, __COUNTER__) { \
+ char x[(expr) ? 0 : -1]; \
+ };
+#endif
+
+#define assert_return(expr, r) \
+ do { \
+ if (!assert_log(expr, #expr)) \
+ return (r); \
+ } while (false)
+
+#define assert_return_errno(expr, r, err) \
+ do { \
+ if (!assert_log(expr, #expr)) { \
+ errno = err; \
+ return (r); \
+ } \
+ } while (false)
+
+#define return_with_errno(r, err) \
+ do { \
+ errno = abs(err); \
+ return r; \
+ } while (false)
+
+#define PTR_TO_INT(p) ((int) ((intptr_t) (p)))
+#define INT_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_UINT(p) ((unsigned) ((uintptr_t) (p)))
+#define UINT_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_LONG(p) ((long) ((intptr_t) (p)))
+#define LONG_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_ULONG(p) ((unsigned long) ((uintptr_t) (p)))
+#define ULONG_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_INT32(p) ((int32_t) ((intptr_t) (p)))
+#define INT32_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_UINT32(p) ((uint32_t) ((uintptr_t) (p)))
+#define UINT32_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_INT64(p) ((int64_t) ((intptr_t) (p)))
+#define INT64_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_UINT64(p) ((uint64_t) ((uintptr_t) (p)))
+#define UINT64_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_SIZE(p) ((size_t) ((uintptr_t) (p)))
+#define SIZE_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define CHAR_TO_STR(x) ((char[2]) { x, 0 })
+
+#define char_array_0(x) x[sizeof(x)-1] = 0;
+
+/* Returns the number of chars needed to format variables of the
+ * specified type as a decimal string. Adds in extra space for a
+ * negative '-' prefix (hence works correctly on signed
+ * types). Includes space for the trailing NUL. */
+#define DECIMAL_STR_MAX(type) \
+ (2+(sizeof(type) <= 1 ? 3 : \
+ sizeof(type) <= 2 ? 5 : \
+ sizeof(type) <= 4 ? 10 : \
+ sizeof(type) <= 8 ? 20 : sizeof(int[-2*(sizeof(type) > 8)])))
+
+#define DECIMAL_STR_WIDTH(x) \
+ ({ \
+ typeof(x) _x_ = (x); \
+ unsigned ans = 1; \
+ while ((_x_ /= 10) != 0) \
+ ans++; \
+ ans; \
+ })
+
+#define SET_FLAG(v, flag, b) \
+ (v) = (b) ? ((v) | (flag)) : ((v) & ~(flag))
+#define FLAGS_SET(v, flags) \
+ ((~(v) & (flags)) == 0)
+
+#define CASE_F(X) case X:
+#define CASE_F_1(CASE, X) CASE_F(X)
+#define CASE_F_2(CASE, X, ...) CASE(X) CASE_F_1(CASE, __VA_ARGS__)
+#define CASE_F_3(CASE, X, ...) CASE(X) CASE_F_2(CASE, __VA_ARGS__)
+#define CASE_F_4(CASE, X, ...) CASE(X) CASE_F_3(CASE, __VA_ARGS__)
+#define CASE_F_5(CASE, X, ...) CASE(X) CASE_F_4(CASE, __VA_ARGS__)
+#define CASE_F_6(CASE, X, ...) CASE(X) CASE_F_5(CASE, __VA_ARGS__)
+#define CASE_F_7(CASE, X, ...) CASE(X) CASE_F_6(CASE, __VA_ARGS__)
+#define CASE_F_8(CASE, X, ...) CASE(X) CASE_F_7(CASE, __VA_ARGS__)
+#define CASE_F_9(CASE, X, ...) CASE(X) CASE_F_8(CASE, __VA_ARGS__)
+#define CASE_F_10(CASE, X, ...) CASE(X) CASE_F_9(CASE, __VA_ARGS__)
+#define CASE_F_11(CASE, X, ...) CASE(X) CASE_F_10(CASE, __VA_ARGS__)
+#define CASE_F_12(CASE, X, ...) CASE(X) CASE_F_11(CASE, __VA_ARGS__)
+#define CASE_F_13(CASE, X, ...) CASE(X) CASE_F_12(CASE, __VA_ARGS__)
+#define CASE_F_14(CASE, X, ...) CASE(X) CASE_F_13(CASE, __VA_ARGS__)
+#define CASE_F_15(CASE, X, ...) CASE(X) CASE_F_14(CASE, __VA_ARGS__)
+#define CASE_F_16(CASE, X, ...) CASE(X) CASE_F_15(CASE, __VA_ARGS__)
+#define CASE_F_17(CASE, X, ...) CASE(X) CASE_F_16(CASE, __VA_ARGS__)
+#define CASE_F_18(CASE, X, ...) CASE(X) CASE_F_17(CASE, __VA_ARGS__)
+#define CASE_F_19(CASE, X, ...) CASE(X) CASE_F_18(CASE, __VA_ARGS__)
+#define CASE_F_20(CASE, X, ...) CASE(X) CASE_F_19(CASE, __VA_ARGS__)
+
+#define GET_CASE_F(_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12,_13,_14,_15,_16,_17,_18,_19,_20,NAME,...) NAME
+#define FOR_EACH_MAKE_CASE(...) \
+ GET_CASE_F(__VA_ARGS__,CASE_F_20,CASE_F_19,CASE_F_18,CASE_F_17,CASE_F_16,CASE_F_15,CASE_F_14,CASE_F_13,CASE_F_12,CASE_F_11, \
+ CASE_F_10,CASE_F_9,CASE_F_8,CASE_F_7,CASE_F_6,CASE_F_5,CASE_F_4,CASE_F_3,CASE_F_2,CASE_F_1) \
+ (CASE_F,__VA_ARGS__)
+
+#define IN_SET(x, ...) \
+ ({ \
+ bool _found = false; \
+ /* If the build breaks in the line below, you need to extend the case macros. (We use "long double" as \
+ * type for the array, in the hope that checkers such as ubsan don't complain that the initializers for \
+ * the array are not representable by the base type. Ideally we'd use typeof(x) as base type, but that \
+ * doesn't work, as we want to use this on bitfields and gcc refuses typeof() on bitfields.) */ \
+ assert_cc((sizeof((long double[]){__VA_ARGS__})/sizeof(long double)) <= 20); \
+ switch(x) { \
+ FOR_EACH_MAKE_CASE(__VA_ARGS__) \
+ _found = true; \
+ break; \
+ default: \
+ break; \
+ } \
+ _found; \
+ })
+
+#define SWAP_TWO(x, y) do { \
+ typeof(x) _t = (x); \
+ (x) = (y); \
+ (y) = (_t); \
+ } while (false)
+
+/* Define C11 thread_local attribute even on older gcc compiler
+ * version */
+#ifndef thread_local
+/*
+ * Don't break on glibc < 2.16 that doesn't define __STDC_NO_THREADS__
+ * see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53769
+ */
+#if __STDC_VERSION__ >= 201112L && !(defined(__STDC_NO_THREADS__) || (defined(__GNU_LIBRARY__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16))
+#define thread_local _Thread_local
+#else
+#define thread_local __thread
+#endif
+#endif
+
+#define DEFINE_TRIVIAL_DESTRUCTOR(name, type, func) \
+ static inline void name(type *p) { \
+ func(p); \
+ }
+
+#define DEFINE_TRIVIAL_CLEANUP_FUNC(type, func) \
+ static inline void func##p(type *p) { \
+ if (*p) \
+ func(*p); \
+ }
+
+#define _DEFINE_TRIVIAL_REF_FUNC(type, name, scope) \
+ scope type *name##_ref(type *p) { \
+ if (!p) \
+ return NULL; \
+ \
+ assert(p->n_ref > 0); \
+ p->n_ref++; \
+ return p; \
+ }
+
+#define _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, scope) \
+ scope type *name##_unref(type *p) { \
+ if (!p) \
+ return NULL; \
+ \
+ assert(p->n_ref > 0); \
+ p->n_ref--; \
+ if (p->n_ref > 0) \
+ return NULL; \
+ \
+ return free_func(p); \
+ }
+
+#define DEFINE_TRIVIAL_REF_FUNC(type, name) \
+ _DEFINE_TRIVIAL_REF_FUNC(type, name,)
+#define DEFINE_PRIVATE_TRIVIAL_REF_FUNC(type, name) \
+ _DEFINE_TRIVIAL_REF_FUNC(type, name, static)
+#define DEFINE_PUBLIC_TRIVIAL_REF_FUNC(type, name) \
+ _DEFINE_TRIVIAL_REF_FUNC(type, name, _public_)
+
+#define DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func,)
+#define DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, static)
+#define DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, _public_)
+
+#define DEFINE_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_TRIVIAL_REF_FUNC(type, name); \
+ DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func);
+
+#define DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_PRIVATE_TRIVIAL_REF_FUNC(type, name); \
+ DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(type, name, free_func);
+
+#define DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_PUBLIC_TRIVIAL_REF_FUNC(type, name); \
+ DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC(type, name, free_func);
+
+#include "log.h"
diff --git a/src/basic/memfd-util.c b/src/basic/memfd-util.c
new file mode 100644
index 0000000..f88f0fc
--- /dev/null
+++ b/src/basic/memfd-util.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#if HAVE_LINUX_MEMFD_H
+#include <linux/memfd.h>
+#endif
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "memfd-util.h"
+#include "missing.h"
+#include "string-util.h"
+#include "utf8.h"
+
+int memfd_new(const char *name) {
+ _cleanup_free_ char *g = NULL;
+ int fd;
+
+ if (!name) {
+ char pr[17] = {};
+
+ /* If no name is specified we generate one. We include
+ * a hint indicating our library implementation, and
+ * add the thread name to it */
+
+ assert_se(prctl(PR_GET_NAME, (unsigned long) pr) >= 0);
+
+ if (isempty(pr))
+ name = "sd";
+ else {
+ _cleanup_free_ char *e = NULL;
+
+ e = utf8_escape_invalid(pr);
+ if (!e)
+ return -ENOMEM;
+
+ g = strappend("sd-", e);
+ if (!g)
+ return -ENOMEM;
+
+ name = g;
+ }
+ }
+
+ fd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int memfd_map(int fd, uint64_t offset, size_t size, void **p) {
+ void *q;
+ int sealed;
+
+ assert(fd >= 0);
+ assert(size > 0);
+ assert(p);
+
+ sealed = memfd_get_sealed(fd);
+ if (sealed < 0)
+ return sealed;
+
+ if (sealed)
+ q = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, offset);
+ else
+ q = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset);
+
+ if (q == MAP_FAILED)
+ return -errno;
+
+ *p = q;
+ return 0;
+}
+
+int memfd_set_sealed(int fd) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int memfd_get_sealed(int fd) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0)
+ return -errno;
+
+ return r == (F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL);
+}
+
+int memfd_get_size(int fd, uint64_t *sz) {
+ struct stat stat;
+ int r;
+
+ assert(fd >= 0);
+ assert(sz);
+
+ r = fstat(fd, &stat);
+ if (r < 0)
+ return -errno;
+
+ *sz = stat.st_size;
+ return 0;
+}
+
+int memfd_set_size(int fd, uint64_t sz) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = ftruncate(fd, sz);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int memfd_new_and_map(const char *name, size_t sz, void **p) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(sz > 0);
+ assert(p);
+
+ fd = memfd_new(name);
+ if (fd < 0)
+ return fd;
+
+ r = memfd_set_size(fd, sz);
+ if (r < 0)
+ return r;
+
+ r = memfd_map(fd, 0, sz, p);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(fd);
+}
diff --git a/src/basic/memfd-util.h b/src/basic/memfd-util.h
new file mode 100644
index 0000000..5ebb519
--- /dev/null
+++ b/src/basic/memfd-util.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+int memfd_new(const char *name);
+int memfd_new_and_map(const char *name, size_t sz, void **p);
+
+int memfd_map(int fd, uint64_t offset, size_t size, void **p);
+
+int memfd_set_sealed(int fd);
+int memfd_get_sealed(int fd);
+
+int memfd_get_size(int fd, uint64_t *sz);
+int memfd_set_size(int fd, uint64_t sz);
diff --git a/src/basic/mempool.c b/src/basic/mempool.c
new file mode 100644
index 0000000..159c963
--- /dev/null
+++ b/src/basic/mempool.c
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "env-util.h"
+#include "macro.h"
+#include "mempool.h"
+#include "process-util.h"
+#include "util.h"
+
+struct pool {
+ struct pool *next;
+ size_t n_tiles;
+ size_t n_used;
+};
+
+void* mempool_alloc_tile(struct mempool *mp) {
+ size_t i;
+
+ /* When a tile is released we add it to the list and simply
+ * place the next pointer at its offset 0. */
+
+ assert(mp->tile_size >= sizeof(void*));
+ assert(mp->at_least > 0);
+
+ if (mp->freelist) {
+ void *r;
+
+ r = mp->freelist;
+ mp->freelist = * (void**) mp->freelist;
+ return r;
+ }
+
+ if (_unlikely_(!mp->first_pool) ||
+ _unlikely_(mp->first_pool->n_used >= mp->first_pool->n_tiles)) {
+ size_t size, n;
+ struct pool *p;
+
+ n = mp->first_pool ? mp->first_pool->n_tiles : 0;
+ n = MAX(mp->at_least, n * 2);
+ size = PAGE_ALIGN(ALIGN(sizeof(struct pool)) + n*mp->tile_size);
+ n = (size - ALIGN(sizeof(struct pool))) / mp->tile_size;
+
+ p = malloc(size);
+ if (!p)
+ return NULL;
+
+ p->next = mp->first_pool;
+ p->n_tiles = n;
+ p->n_used = 0;
+
+ mp->first_pool = p;
+ }
+
+ i = mp->first_pool->n_used++;
+
+ return ((uint8_t*) mp->first_pool) + ALIGN(sizeof(struct pool)) + i*mp->tile_size;
+}
+
+void* mempool_alloc0_tile(struct mempool *mp) {
+ void *p;
+
+ p = mempool_alloc_tile(mp);
+ if (p)
+ memzero(p, mp->tile_size);
+ return p;
+}
+
+void mempool_free_tile(struct mempool *mp, void *p) {
+ * (void**) p = mp->freelist;
+ mp->freelist = p;
+}
+
+bool mempool_enabled(void) {
+ static int b = -1;
+
+ if (!is_main_thread())
+ return false;
+
+ if (!mempool_use_allowed)
+ b = false;
+ if (b < 0)
+ b = getenv_bool("SYSTEMD_MEMPOOL") != 0;
+
+ return b;
+}
+
+#if VALGRIND
+void mempool_drop(struct mempool *mp) {
+ struct pool *p = mp->first_pool;
+ while (p) {
+ struct pool *n;
+ n = p->next;
+ free(p);
+ p = n;
+ }
+}
+#endif
diff --git a/src/basic/mempool.h b/src/basic/mempool.h
new file mode 100644
index 0000000..0eecca0
--- /dev/null
+++ b/src/basic/mempool.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+
+struct pool;
+
+struct mempool {
+ struct pool *first_pool;
+ void *freelist;
+ size_t tile_size;
+ unsigned at_least;
+};
+
+void* mempool_alloc_tile(struct mempool *mp);
+void* mempool_alloc0_tile(struct mempool *mp);
+void mempool_free_tile(struct mempool *mp, void *p);
+
+#define DEFINE_MEMPOOL(pool_name, tile_type, alloc_at_least) \
+static struct mempool pool_name = { \
+ .tile_size = sizeof(tile_type), \
+ .at_least = alloc_at_least, \
+}
+
+extern const bool mempool_use_allowed;
+bool mempool_enabled(void);
+
+#if VALGRIND
+void mempool_drop(struct mempool *mp);
+#endif
diff --git a/src/basic/meson.build b/src/basic/meson.build
new file mode 100644
index 0000000..e5852f3
--- /dev/null
+++ b/src/basic/meson.build
@@ -0,0 +1,310 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+basic_sources = files('''
+ MurmurHash2.c
+ MurmurHash2.h
+ af-list.c
+ af-list.h
+ alloc-util.c
+ alloc-util.h
+ architecture.c
+ architecture.h
+ arphrd-list.c
+ arphrd-list.h
+ async.c
+ async.h
+ audit-util.c
+ audit-util.h
+ blockdev-util.c
+ blockdev-util.h
+ btrfs-util.c
+ btrfs-util.h
+ build.h
+ bus-label.c
+ bus-label.h
+ cap-list.c
+ cap-list.h
+ capability-util.c
+ capability-util.h
+ cgroup-util.c
+ cgroup-util.h
+ chattr-util.c
+ chattr-util.h
+ conf-files.c
+ conf-files.h
+ copy.c
+ copy.h
+ def.h
+ device-nodes.c
+ device-nodes.h
+ dirent-util.c
+ dirent-util.h
+ env-file.c
+ env-file.h
+ env-util.c
+ env-util.h
+ errno-list.c
+ errno-list.h
+ escape.c
+ escape.h
+ ether-addr-util.c
+ ether-addr-util.h
+ extract-word.c
+ extract-word.h
+ fd-util.c
+ fd-util.h
+ fileio.c
+ fileio.h
+ format-util.h
+ fs-util.c
+ fs-util.h
+ glob-util.c
+ glob-util.h
+ gunicode.c
+ gunicode.h
+ hash-funcs.c
+ hash-funcs.h
+ hashmap.c
+ hashmap.h
+ hexdecoct.c
+ hexdecoct.h
+ hostname-util.c
+ hostname-util.h
+ in-addr-util.c
+ in-addr-util.h
+ io-util.c
+ io-util.h
+ ioprio.h
+ khash.c
+ khash.h
+ label.c
+ label.h
+ list.h
+ locale-util.c
+ locale-util.h
+ log.c
+ log.h
+ login-util.c
+ login-util.h
+ macro.h
+ memfd-util.c
+ memfd-util.h
+ mempool.c
+ mempool.h
+ missing.h
+ missing_audit.h
+ missing_btrfs.h
+ missing_btrfs_tree.h
+ missing_capability.h
+ missing_drm.h
+ missing_ethtool.h
+ missing_fcntl.h
+ missing_fib_rules.h
+ missing_fou.h
+ missing_fs.h
+ missing_if_bridge.h
+ missing_if_link.h
+ missing_if_tunnel.h
+ missing_input.h
+ missing_keyctl.h
+ missing_magic.h
+ missing_mman.h
+ missing_network.h
+ missing_prctl.h
+ missing_random.h
+ missing_resource.h
+ missing_sched.h
+ missing_securebits.h
+ missing_socket.h
+ missing_stat.h
+ missing_stdlib.h
+ missing_syscall.h
+ missing_timerfd.h
+ missing_type.h
+ missing_vxcan.h
+ mkdir-label.c
+ mkdir.c
+ mkdir.h
+ mountpoint-util.c
+ mountpoint-util.h
+ nss-util.h
+ ordered-set.c
+ ordered-set.h
+ parse-util.c
+ parse-util.h
+ path-util.c
+ path-util.h
+ prioq.c
+ prioq.h
+ proc-cmdline.c
+ proc-cmdline.h
+ process-util.c
+ process-util.h
+ procfs-util.c
+ procfs-util.h
+ random-util.c
+ random-util.h
+ ratelimit.c
+ ratelimit.h
+ raw-clone.h
+ raw-reboot.h
+ refcnt.h
+ replace-var.c
+ replace-var.h
+ rlimit-util.c
+ rlimit-util.h
+ rm-rf.c
+ rm-rf.h
+ selinux-util.c
+ selinux-util.h
+ set.h
+ sigbus.c
+ sigbus.h
+ signal-util.c
+ signal-util.h
+ siphash24.c
+ siphash24.h
+ smack-util.c
+ smack-util.h
+ socket-label.c
+ socket-util.c
+ socket-util.h
+ sparse-endian.h
+ special.h
+ stat-util.c
+ stat-util.h
+ static-destruct.h
+ stdio-util.h
+ strbuf.c
+ strbuf.h
+ string-table.c
+ string-table.h
+ string-util.c
+ string-util.h
+ strv.c
+ strv.h
+ strxcpyx.c
+ strxcpyx.h
+ syslog-util.c
+ syslog-util.h
+ terminal-util.c
+ terminal-util.h
+ time-util.c
+ time-util.h
+ tmpfile-util.c
+ tmpfile-util.h
+ umask-util.h
+ unaligned.h
+ unit-def.c
+ unit-def.h
+ unit-name.c
+ unit-name.h
+ user-util.c
+ user-util.h
+ utf8.c
+ utf8.h
+ util.c
+ util.h
+ virt.c
+ virt.h
+ xattr-util.c
+ xattr-util.h
+'''.split())
+
+missing_audit_h = files('missing_audit.h')
+missing_capability_h = files('missing_capability.h')
+missing_network_h = files('missing_network.h')
+missing_socket_h = files('missing_socket.h')
+
+generate_af_list = find_program('generate-af-list.sh')
+af_list_txt = custom_target(
+ 'af-list.txt',
+ output : 'af-list.txt',
+ command : [generate_af_list, cpp, config_h, missing_socket_h],
+ capture : true)
+
+generate_arphrd_list = find_program('generate-arphrd-list.sh')
+arphrd_list_txt = custom_target(
+ 'arphrd-list.txt',
+ output : 'arphrd-list.txt',
+ command : [generate_arphrd_list, cpp, config_h, missing_network_h],
+ capture : true)
+
+generate_cap_list = find_program('generate-cap-list.sh')
+cap_list_txt = custom_target(
+ 'cap-list.txt',
+ output : 'cap-list.txt',
+ command : [generate_cap_list, cpp, config_h, missing_capability_h],
+ capture : true)
+
+generate_errno_list = find_program('generate-errno-list.sh')
+errno_list_txt = custom_target(
+ 'errno-list.txt',
+ output : 'errno-list.txt',
+ command : [generate_errno_list, cpp],
+ capture : true)
+
+generated_gperf_headers = []
+foreach item : [['af', af_list_txt, 'af', ''],
+ ['arphrd', arphrd_list_txt, 'arphrd', 'ARPHRD_'],
+ ['cap', cap_list_txt, 'capability', ''],
+ ['errno', errno_list_txt, 'errno', '']]
+
+ fname = '@0@-from-name.gperf'.format(item[0])
+ gperf_file = custom_target(
+ fname,
+ input : item[1],
+ output : fname,
+ command : [generate_gperfs, item[2], item[3], '@INPUT@'],
+ capture : true)
+
+ fname = '@0@-from-name.h'.format(item[0])
+ target1 = custom_target(
+ fname,
+ input : gperf_file,
+ output : fname,
+ command : [gperf,
+ '-L', 'ANSI-C', '-t', '--ignore-case',
+ '-N', 'lookup_@0@'.format(item[2]),
+ '-H', 'hash_@0@_name'.format(item[2]),
+ '-p', '-C',
+ '@INPUT@'],
+ capture : true)
+
+ fname = '@0@-to-name.h'.format(item[0])
+ awkscript = '@0@-to-name.awk'.format(item[0])
+ target2 = custom_target(
+ fname,
+ input : [awkscript, item[1]],
+ output : fname,
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+ generated_gperf_headers += [target1, target2]
+endforeach
+
+basic_sources += generated_gperf_headers
+basic_gcrypt_sources = files(
+ 'gcrypt-util.c',
+ 'gcrypt-util.h')
+
+libbasic = static_library(
+ 'basic',
+ basic_sources,
+ include_directories : includes,
+ dependencies : [versiondep,
+ threads,
+ libcap,
+ libselinux,
+ libm],
+ c_args : ['-fvisibility=default'],
+ install : false)
+
+# A convenience library that is separate from libbasic to avoid
+# unnecessary linking to libgcrypt.
+libbasic_gcrypt = static_library(
+ 'basic-gcrypt',
+ basic_gcrypt_sources,
+ include_directories : includes,
+ dependencies : [libgcrypt],
+ c_args : ['-fvisibility=default'])
diff --git a/src/basic/missing.h b/src/basic/missing.h
new file mode 100644
index 0000000..5067c8f
--- /dev/null
+++ b/src/basic/missing.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* Missing glibc definitions to access certain kernel APIs */
+
+#include "missing_audit.h"
+#include "missing_btrfs_tree.h"
+#include "missing_capability.h"
+#include "missing_drm.h"
+#include "missing_fcntl.h"
+#include "missing_fs.h"
+#include "missing_input.h"
+#include "missing_magic.h"
+#include "missing_mman.h"
+#include "missing_network.h"
+#include "missing_prctl.h"
+#include "missing_random.h"
+#include "missing_resource.h"
+#include "missing_sched.h"
+#include "missing_socket.h"
+#include "missing_stdlib.h"
+#include "missing_timerfd.h"
+#include "missing_type.h"
+
+#include "missing_syscall.h"
diff --git a/src/basic/missing_audit.h b/src/basic/missing_audit.h
new file mode 100644
index 0000000..b00d537
--- /dev/null
+++ b/src/basic/missing_audit.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/audit.h>
+
+#if HAVE_AUDIT
+#include <libaudit.h>
+#endif
+
+#ifndef AUDIT_SERVICE_START
+#define AUDIT_SERVICE_START 1130 /* Service (daemon) start */
+#endif
+
+#ifndef AUDIT_SERVICE_STOP
+#define AUDIT_SERVICE_STOP 1131 /* Service (daemon) stop */
+#endif
+
+#ifndef MAX_AUDIT_MESSAGE_LENGTH
+#define MAX_AUDIT_MESSAGE_LENGTH 8970
+#endif
+
+#ifndef AUDIT_NLGRP_MAX
+#define AUDIT_NLGRP_READLOG 1
+#endif
diff --git a/src/basic/missing_btrfs.h b/src/basic/missing_btrfs.h
new file mode 100644
index 0000000..34c382f
--- /dev/null
+++ b/src/basic/missing_btrfs.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* Old btrfs.h requires stddef.h to be included before btrfs.h */
+#include <stddef.h>
+
+#include <linux/btrfs.h>
+
+/* linux@57254b6ebce4ceca02d9c8b615f6059c56c19238 (3.11) */
+#ifndef BTRFS_IOC_QUOTA_RESCAN_WAIT
+#define BTRFS_IOC_QUOTA_RESCAN_WAIT _IO(BTRFS_IOCTL_MAGIC, 46)
+#endif
+
+/* linux@83288b60bf6668933689078973136e0c9d387b38 (4.7) */
+#ifndef BTRFS_QGROUP_LIMIT_MAX_RFER
+#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0)
+#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1)
+#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2)
+#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3)
+#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4)
+#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5)
+#endif
diff --git a/src/basic/missing_btrfs_tree.h b/src/basic/missing_btrfs_tree.h
new file mode 100644
index 0000000..555f90f
--- /dev/null
+++ b/src/basic/missing_btrfs_tree.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/types.h>
+
+#include "missing_btrfs.h"
+
+/* linux@db6711600e27c885aed89751f04e727f3af26715 (4.7) */
+#if HAVE_LINUX_BTRFS_TREE_H
+#include <linux/btrfs_tree.h>
+#else
+#define BTRFS_ROOT_TREE_OBJECTID 1
+#define BTRFS_QUOTA_TREE_OBJECTID 8
+#define BTRFS_FIRST_FREE_OBJECTID 256
+#define BTRFS_LAST_FREE_OBJECTID -256ULL
+
+#define BTRFS_ROOT_ITEM_KEY 132
+#define BTRFS_ROOT_BACKREF_KEY 144
+#define BTRFS_QGROUP_STATUS_KEY 240
+#define BTRFS_QGROUP_INFO_KEY 242
+#define BTRFS_QGROUP_LIMIT_KEY 244
+#define BTRFS_QGROUP_RELATION_KEY 246
+
+struct btrfs_disk_key {
+ __le64 objectid;
+ __u8 type;
+ __le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_timespec {
+ __le64 sec;
+ __le32 nsec;
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_item {
+ __le64 generation;
+ __le64 transid;
+ __le64 size;
+ __le64 nbytes;
+ __le64 block_group;
+ __le32 nlink;
+ __le32 uid;
+ __le32 gid;
+ __le32 mode;
+ __le64 rdev;
+ __le64 flags;
+ __le64 sequence;
+ __le64 reserved[4];
+ struct btrfs_timespec atime;
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec mtime;
+ struct btrfs_timespec otime;
+} __attribute__ ((__packed__));
+
+#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
+
+struct btrfs_root_item {
+ struct btrfs_inode_item inode;
+ __le64 generation;
+ __le64 root_dirid;
+ __le64 bytenr;
+ __le64 byte_limit;
+ __le64 bytes_used;
+ __le64 last_snapshot;
+ __le64 flags;
+ __le32 refs;
+ struct btrfs_disk_key drop_progress;
+ __u8 drop_level;
+ __u8 level;
+
+ __le64 generation_v2;
+ __u8 uuid[BTRFS_UUID_SIZE];
+ __u8 parent_uuid[BTRFS_UUID_SIZE];
+ __u8 received_uuid[BTRFS_UUID_SIZE];
+ __le64 ctransid; /* updated when an inode changes */
+ __le64 otransid; /* trans when created */
+ __le64 stransid; /* trans when sent. non-zero for received subvol */
+ __le64 rtransid; /* trans when received. non-zero for received subvol */
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec otime;
+ struct btrfs_timespec stime;
+ struct btrfs_timespec rtime;
+ __le64 reserved[8]; /* for future */
+} __attribute__ ((__packed__));
+
+struct btrfs_root_ref {
+ __le64 dirid;
+ __le64 sequence;
+ __le16 name_len;
+} __attribute__ ((__packed__));
+
+#define BTRFS_QGROUP_LEVEL_SHIFT 48
+
+struct btrfs_qgroup_info_item {
+ __le64 generation;
+ __le64 rfer;
+ __le64 rfer_cmpr;
+ __le64 excl;
+ __le64 excl_cmpr;
+} __attribute__ ((__packed__));
+
+struct btrfs_qgroup_limit_item {
+ __le64 flags;
+ __le64 max_rfer;
+ __le64 max_excl;
+ __le64 rsv_rfer;
+ __le64 rsv_excl;
+} __attribute__ ((__packed__));
+#endif
diff --git a/src/basic/missing_capability.h b/src/basic/missing_capability.h
new file mode 100644
index 0000000..1308a3d
--- /dev/null
+++ b/src/basic/missing_capability.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/capability.h>
+
+/* 3a101b8de0d39403b2c7e5c23fd0b005668acf48 (3.16) */
+#ifndef CAP_AUDIT_READ
+#define CAP_AUDIT_READ 37
+
+#undef CAP_LAST_CAP
+#define CAP_LAST_CAP CAP_AUDIT_READ
+#endif
diff --git a/src/basic/missing_drm.h b/src/basic/missing_drm.h
new file mode 100644
index 0000000..a64f74e
--- /dev/null
+++ b/src/basic/missing_drm.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#ifndef DRM_IOCTL_SET_MASTER
+#define DRM_IOCTL_SET_MASTER _IO('d', 0x1e)
+#endif
+
+#ifndef DRM_IOCTL_DROP_MASTER
+#define DRM_IOCTL_DROP_MASTER _IO('d', 0x1f)
+#endif
diff --git a/src/basic/missing_ethtool.h b/src/basic/missing_ethtool.h
new file mode 100644
index 0000000..9ba929c
--- /dev/null
+++ b/src/basic/missing_ethtool.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/types.h>
+
+/* Missing definitions in ethtool.h */
+
+#if !HAVE_ETHTOOL_LINK_MODE_10baseT_Half_BIT /* linux@3f1ac7a700d039c61d8d8b99f28d605d489a60cf (4.6) */
+
+#define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */
+#define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */
+
+struct ethtool_link_settings {
+ __u32 cmd;
+ __u32 speed;
+ __u8 duplex;
+ __u8 port;
+ __u8 phy_address;
+ __u8 autoneg;
+ __u8 mdio_support;
+ __u8 eth_tp_mdix;
+ __u8 eth_tp_mdix_ctrl;
+ __s8 link_mode_masks_nwords;
+ __u8 transceiver;
+ __u8 reserved1[3];
+ __u32 reserved[7];
+ __u32 link_mode_masks[0];
+ /* layout of link_mode_masks fields:
+ * __u32 map_supported[link_mode_masks_nwords];
+ * __u32 map_advertising[link_mode_masks_nwords];
+ * __u32 map_lp_advertising[link_mode_masks_nwords];
+ */
+};
+
+enum ethtool_link_mode_bit_indices {
+ ETHTOOL_LINK_MODE_10baseT_Half_BIT = 0,
+ ETHTOOL_LINK_MODE_10baseT_Full_BIT = 1,
+ ETHTOOL_LINK_MODE_100baseT_Half_BIT = 2,
+ ETHTOOL_LINK_MODE_100baseT_Full_BIT = 3,
+ ETHTOOL_LINK_MODE_1000baseT_Half_BIT = 4,
+ ETHTOOL_LINK_MODE_1000baseT_Full_BIT = 5,
+ ETHTOOL_LINK_MODE_Autoneg_BIT = 6,
+ ETHTOOL_LINK_MODE_TP_BIT = 7,
+ ETHTOOL_LINK_MODE_AUI_BIT = 8,
+ ETHTOOL_LINK_MODE_MII_BIT = 9,
+ ETHTOOL_LINK_MODE_FIBRE_BIT = 10,
+ ETHTOOL_LINK_MODE_BNC_BIT = 11,
+ ETHTOOL_LINK_MODE_10000baseT_Full_BIT = 12,
+ ETHTOOL_LINK_MODE_Pause_BIT = 13,
+ ETHTOOL_LINK_MODE_Asym_Pause_BIT = 14,
+ ETHTOOL_LINK_MODE_2500baseX_Full_BIT = 15,
+ ETHTOOL_LINK_MODE_Backplane_BIT = 16,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT = 17,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT = 18,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT = 19,
+ ETHTOOL_LINK_MODE_10000baseR_FEC_BIT = 20,
+ ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT = 21,
+ ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT = 22,
+ ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT = 23,
+ ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT = 24,
+ ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT = 25,
+ ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT = 26,
+ ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT = 27,
+ ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT = 28,
+ ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29,
+ ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30,
+ ETHTOOL_LINK_MODE_25000baseCR_Full_BIT = 31,
+ ETHTOOL_LINK_MODE_25000baseKR_Full_BIT = 32,
+ ETHTOOL_LINK_MODE_25000baseSR_Full_BIT = 33,
+ ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT = 34,
+ ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT = 35,
+ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT = 36,
+ ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37,
+ ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38,
+ ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39,
+ ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40,
+ ETHTOOL_LINK_MODE_1000baseX_Full_BIT = 41,
+ ETHTOOL_LINK_MODE_10000baseCR_Full_BIT = 42,
+ ETHTOOL_LINK_MODE_10000baseSR_Full_BIT = 43,
+ ETHTOOL_LINK_MODE_10000baseLR_Full_BIT = 44,
+ ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45,
+ ETHTOOL_LINK_MODE_10000baseER_Full_BIT = 46,
+ ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47,
+ ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48,
+
+ ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49,
+ ETHTOOL_LINK_MODE_FEC_RS_BIT = 50,
+ ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51,
+
+ /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
+ * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
+ * macro for bits > 31. The only way to use indices > 31 is to
+ * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API.
+ */
+
+ __ETHTOOL_LINK_MODE_LAST
+ = ETHTOOL_LINK_MODE_FEC_BASER_BIT,
+};
+#else
+#if !HAVE_ETHTOOL_LINK_MODE_25000baseCR_Full_BIT /* linux@3851112e4737cd52aaeda0ce8d084be9ee128106 (4.7) */
+#define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31
+#define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32
+#define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33
+#define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34
+#define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35
+#define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36
+#define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37
+#define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38
+#define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39
+#endif
+#if !HAVE_ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT /* linux@89da45b8b5b2187734a11038b8593714f964ffd1 (4.8) */
+#define ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT 40
+#endif
+#if !HAVE_ETHTOOL_LINK_MODE_1000baseX_Full_BIT /* linux@5711a98221443aec54c4c81ee98c6ae46acccb65 (4.9) */
+#define ETHTOOL_LINK_MODE_1000baseX_Full_BIT 41
+#define ETHTOOL_LINK_MODE_10000baseCR_Full_BIT 42
+#define ETHTOOL_LINK_MODE_10000baseSR_Full_BIT 43
+#define ETHTOOL_LINK_MODE_10000baseLR_Full_BIT 44
+#define ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT 45
+#define ETHTOOL_LINK_MODE_10000baseER_Full_BIT 46
+#endif
+#if !HAVE_ETHTOOL_LINK_MODE_2500baseT_Full_BIT /* linux@94842b4fc4d6b1691cfc86c6f5251f299d27f4ba (4.10) */
+#define ETHTOOL_LINK_MODE_2500baseT_Full_BIT 47
+#define ETHTOOL_LINK_MODE_5000baseT_Full_BIT 48
+#endif
+#if !HAVE_ETHTOOL_LINK_MODE_FEC_NONE_BIT /* linux@1a5f3da20bd966220931239fbd31e6ac6ff42251 (4.14) */
+#define ETHTOOL_LINK_MODE_FEC_NONE_BIT 49
+#define ETHTOOL_LINK_MODE_FEC_RS_BIT 50
+#define ETHTOOL_LINK_MODE_FEC_BASER_BIT 51
+#endif
+#endif
diff --git a/src/basic/missing_fcntl.h b/src/basic/missing_fcntl.h
new file mode 100644
index 0000000..5d1c635
--- /dev/null
+++ b/src/basic/missing_fcntl.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <fcntl.h>
+
+#ifndef F_LINUX_SPECIFIC_BASE
+#define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
+#ifndef F_SETPIPE_SZ
+#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
+#endif
+
+#ifndef F_GETPIPE_SZ
+#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
+#endif
+
+#ifndef F_ADD_SEALS
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+
+#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+#define F_SEAL_GROW 0x0004 /* prevent file from growing */
+#define F_SEAL_WRITE 0x0008 /* prevent writes */
+#endif
+
+#ifndef F_OFD_GETLK
+#define F_OFD_GETLK 36
+#define F_OFD_SETLK 37
+#define F_OFD_SETLKW 38
+#endif
+
+#ifndef MAX_HANDLE_SZ
+#define MAX_HANDLE_SZ 128
+#endif
+
+/* The precise definition of __O_TMPFILE is arch specific; use the
+ * values defined by the kernel (note: some are hexa, some are octal,
+ * duplicated as-is from the kernel definitions):
+ * - alpha, parisc, sparc: each has a specific value;
+ * - others: they use the "generic" value.
+ */
+
+#ifndef __O_TMPFILE
+#if defined(__alpha__)
+#define __O_TMPFILE 0100000000
+#elif defined(__parisc__) || defined(__hppa__)
+#define __O_TMPFILE 0400000000
+#elif defined(__sparc__) || defined(__sparc64__)
+#define __O_TMPFILE 0x2000000
+#else
+#define __O_TMPFILE 020000000
+#endif
+#endif
+
+/* a horrid kludge trying to make sure that this will fail on old kernels */
+#ifndef O_TMPFILE
+#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
+#endif
diff --git a/src/basic/missing_fib_rules.h b/src/basic/missing_fib_rules.h
new file mode 100644
index 0000000..df120d7
--- /dev/null
+++ b/src/basic/missing_fib_rules.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/types.h>
+
+#if !HAVE_FRA_TUN_ID /* linux@e7030878fc8448492b6e5cecd574043f63271298 (4.3) */
+#define FRA_TUN_ID 12
+#endif
+
+#if !HAVE_FRA_SUPPRESS_PREFIXLEN /* linux@6ef94cfafba159d6b1a902ccb3349ac6a34ff6ad, 73f5698e77219bfc3ea1903759fe8e20ab5b285e (3.12) */
+#define FRA_SUPPRESS_IFGROUP 13
+#define FRA_SUPPRESS_PREFIXLEN 14
+#endif
+
+#if !HAVE_FRA_PAD /* linux@b46f6ded906ef0be52a4881ba50a084aeca64d7e (4.7) */
+#define FRA_PAD 18
+#endif
+
+#if !HAVE_FRA_L3MDEV /* linux@96c63fa7393d0a346acfe5a91e0c7d4c7782641b (4.8) */
+#define FRA_L3MDEV 19
+#endif
+
+#if !HAVE_FRA_UID_RANGE /* linux@622ec2c9d52405973c9f1ca5116eb1c393adfc7d (4.10) */
+#define FRA_UID_RANGE 20
+
+struct fib_rule_uid_range {
+ __u32 start;
+ __u32 end;
+};
+#endif
+
+#if !HAVE_FRA_DPORT_RANGE /* linux@1b71af6053af1bd2f849e9fda4f71c1e3f145dcf, bfff4862653bb96001ab57c1edd6d03f48e5f035 (4.17) */
+#define FRA_PROTOCOL 21
+#define FRA_IP_PROTO 22
+#define FRA_SPORT_RANGE 23
+#define FRA_DPORT_RANGE 24
+
+#undef FRA_MAX
+#define FRA_MAX 24
+
+struct fib_rule_port_range {
+ __u16 start;
+ __u16 end;
+};
+#endif
diff --git a/src/basic/missing_fou.h b/src/basic/missing_fou.h
new file mode 100644
index 0000000..d8c7435
--- /dev/null
+++ b/src/basic/missing_fou.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !HAVE_LINUX_FOU_H /* linux@23461551c00628c3f3fe9cf837bf53cf8f212b63 (3.18) */
+
+#define FOU_GENL_NAME "fou"
+#define FOU_GENL_VERSION 0x1
+
+enum {
+ FOU_ATTR_UNSPEC,
+ FOU_ATTR_PORT, /* u16 */
+ FOU_ATTR_AF, /* u8 */
+ FOU_ATTR_IPPROTO, /* u8 */
+ FOU_ATTR_TYPE, /* u8 */
+ FOU_ATTR_REMCSUM_NOPARTIAL, /* flag */
+
+ __FOU_ATTR_MAX,
+};
+
+#define FOU_ATTR_MAX (__FOU_ATTR_MAX - 1)
+
+enum {
+ FOU_CMD_UNSPEC,
+ FOU_CMD_ADD,
+ FOU_CMD_DEL,
+ FOU_CMD_GET,
+
+ __FOU_CMD_MAX,
+};
+
+enum {
+ FOU_ENCAP_UNSPEC,
+ FOU_ENCAP_DIRECT,
+ FOU_ENCAP_GUE,
+};
+
+#define FOU_CMD_MAX (__FOU_CMD_MAX - 1)
+
+#else
+
+#if !HAVE_FOU_ATTR_REMCSUM_NOPARTIAL /* linux@fe881ef11cf0220f118816181930494d484c4883 (4.0) */
+#define FOU_ATTR_REMCSUM_NOPARTIAL 5
+
+#undef FOU_ATTR_MAX
+#define FOU_ATTR_MAX 5
+#endif
+
+#if !HAVE_FOU_CMD_GET /* linux@7a6c8c34e5b71ac50e39588e20b39494a9e1d8e5 (4.1) */
+#define FOU_CMD_GET 3
+
+#undef FOU_CMD_MAX
+#define FOU_CMD_MAX 3
+#endif
+
+#endif
diff --git a/src/basic/missing_fs.h b/src/basic/missing_fs.h
new file mode 100644
index 0000000..48c1af0
--- /dev/null
+++ b/src/basic/missing_fs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* linux/fs.h */
+#ifndef RENAME_NOREPLACE /* 0a7c3937a1f23f8cb5fc77ae01661e9968a51d0c (3.15) */
+#define RENAME_NOREPLACE (1 << 0)
+#endif
+
+/* linux/fs.h or sys/mount.h */
+#ifndef MS_MOVE
+#define MS_MOVE 8192
+#endif
+
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MS_PRIVATE
+#define MS_PRIVATE (1<<18)
+#endif
+
+#ifndef MS_SLAVE
+#define MS_SLAVE (1<<19)
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1<<20)
+#endif
+
+#ifndef MS_RELATIME
+#define MS_RELATIME (1<<21)
+#endif
+
+#ifndef MS_KERNMOUNT
+#define MS_KERNMOUNT (1<<22)
+#endif
+
+#ifndef MS_I_VERSION
+#define MS_I_VERSION (1<<23)
+#endif
+
+#ifndef MS_STRICTATIME
+#define MS_STRICTATIME (1<<24)
+#endif
+
+#ifndef MS_LAZYTIME
+#define MS_LAZYTIME (1<<25)
+#endif
+
+/* Not exposed yet. Defined at fs/ext4/ext4.h */
+#ifndef EXT4_IOC_RESIZE_FS
+#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
+#endif
+
+/* Not exposed yet. Defined at fs/cifs/cifsglob.h */
+#ifndef CIFS_MAGIC_NUMBER
+#define CIFS_MAGIC_NUMBER 0xFF534D42
+#endif
+
+/* linux/nsfs.h */
+#ifndef NS_GET_NSTYPE /* d95fa3c76a66b6d76b1e109ea505c55e66360f3c (4.11) */
+#define NS_GET_NSTYPE _IO(0xb7, 0x3)
+#endif
diff --git a/src/basic/missing_if_bridge.h b/src/basic/missing_if_bridge.h
new file mode 100644
index 0000000..9306062
--- /dev/null
+++ b/src/basic/missing_if_bridge.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !HAVE_IFLA_BRIDGE_VLAN_TUNNEL_INFO /* linux@b3c7ef0adadc5768e0baa786213c6bd1ce521a77 (4.11) */
+#define IFLA_BRIDGE_VLAN_TUNNEL_INFO 3
+
+#undef IFLA_BRIDGE_MAX
+#define IFLA_BRIDGE_MAX 3
+#endif
+
+#ifndef BRIDGE_VLAN_INFO_RANGE_BEGIN
+#define BRIDGE_VLAN_INFO_RANGE_BEGIN (1<<3) /* VLAN is start of vlan range */
+#endif
+
+#ifndef BRIDGE_VLAN_INFO_RANGE_END
+#define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */
+#endif
+
+#ifndef BRIDGE_VLAN_INFO_BRENTRY
+#define BRIDGE_VLAN_INFO_BRENTRY (1<<5) /* Global bridge VLAN entry */
+#endif
diff --git a/src/basic/missing_if_link.h b/src/basic/missing_if_link.h
new file mode 100644
index 0000000..761797f
--- /dev/null
+++ b/src/basic/missing_if_link.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !HAVE_IFLA_INET6_ADDR_GEN_MODE /* linux@bc91b0f07ada5535427373a4e2050877bcc12218 (3.17) */
+#define IFLA_INET6_ADDR_GEN_MODE 8
+
+#undef IFLA_INET6_MAX
+#define IFLA_INET6_MAX 8
+
+enum in6_addr_gen_mode {
+ IN6_ADDR_GEN_MODE_EUI64,
+ IN6_ADDR_GEN_MODE_NONE,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
+ IN6_ADDR_GEN_MODE_RANDOM,
+};
+#else
+#if !HAVE_IN6_ADDR_GEN_MODE_STABLE_PRIVACY /* linux@622c81d57b392cc9be836670eb464a4dfaa9adfe (4.1) */
+#define IN6_ADDR_GEN_MODE_STABLE_PRIVACY 2
+#endif
+#if !HAVE_IN6_ADDR_GEN_MODE_RANDOM /* linux@cc9da6cc4f56e05cc9e591459fe0192727ff58b3 (4.5) */
+#define IN6_ADDR_GEN_MODE_RANDOM 3
+#endif
+#endif /* !HAVE_IFLA_INET6_ADDR_GEN_MODE */
+
+#if !HAVE_IFLA_IPVLAN_MODE /* linux@2ad7bf3638411cb547f2823df08166c13ab04269 (3.19) */
+enum {
+ IFLA_IPVLAN_UNSPEC,
+ IFLA_IPVLAN_MODE,
+ IFLA_IPVLAN_FLAGS,
+ __IFLA_IPVLAN_MAX
+};
+#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1)
+enum ipvlan_mode {
+ IPVLAN_MODE_L2 = 0,
+ IPVLAN_MODE_L3,
+ IPVLAN_MODE_L3S,
+ IPVLAN_MODE_MAX
+};
+#else
+#if !HAVE_IPVLAN_MODE_L3S /* linux@4fbae7d83c98c30efcf0a2a2ac55fbb75ef5a1a5 (4.9) */
+#define IPVLAN_MODE_L3S 2
+#define IPVLAN_MODE_MAX 3
+#endif
+#if !HAVE_IFLA_IPVLAN_FLAGS /* linux@a190d04db93710ae166749055b6985397c6d13f5 (4.15) */
+#define IFLA_IPVLAN_FLAGS 2
+
+#undef IFLA_IPVLAN_MAX
+#define IFLA_IPVLAN_MAX 2
+#endif
+#endif /* !HAVE_IFLA_IPVLAN_MODE */
+
+/* linux@a190d04db93710ae166749055b6985397c6d13f5 (4.15) */
+#ifndef IPVLAN_F_PRIVATE
+#define IPVLAN_F_PRIVATE 0x01
+#endif
+
+/* linux@fe89aa6b250c1011ccf425fbb7998e96bd54263f (4.15) */
+#ifndef IPVLAN_F_VEPA
+#define IPVLAN_F_VEPA 0x02
+#endif
+
+#if !HAVE_IFLA_PHYS_PORT_ID /* linux@66cae9ed6bc46b8cc57a9693f99f69926f3cc7ef (3.12) */
+#define IFLA_PHYS_PORT_ID 34
+#endif
+#if !HAVE_IFLA_CARRIER_CHANGES /* linux@2d3b479df41a10e2f41f9259fcba775bd34de6e4 (3.15) */
+#define IFLA_CARRIER_CHANGES 35
+#endif
+#if !HAVE_IFLA_PHYS_SWITCH_ID /* linux@82f2841291cfaf4d225aa1766424280254d3e3b2 (3.19) */
+#define IFLA_PHYS_SWITCH_ID 36
+#endif
+#if !HAVE_IFLA_LINK_NETNSID /* linux@d37512a277dfb2cef8a578e25a3246f61399a55a (4.0) */
+#define IFLA_LINK_NETNSID 37
+#endif
+#if !HAVE_IFLA_PHYS_PORT_NAME /* linux@db24a9044ee191c397dcd1c6574f56d67d7c8df5 (4.1) */
+#define IFLA_PHYS_PORT_NAME 38
+#endif
+#if !HAVE_IFLA_PROTO_DOWN /* linux@88d6378bd6c096cb8440face3ae3f33d55a2e6e4 (4.3) */
+#define IFLA_PROTO_DOWN 39
+#endif
+#if !HAVE_IFLA_GSO_MAX_SIZE /* linux@c70ce028e834f8e51306217dbdbd441d851c64d3 (4.6) */
+#define IFLA_GSO_MAX_SEGS 40
+#define IFLA_GSO_MAX_SIZE 41
+#endif
+#if !HAVE_IFLA_PAD /* linux@18402843bf88c2e9674e1a3a05c73b7d9b09ee05 (4.7) */
+#define IFLA_PAD 42
+#endif
+#if !HAVE_IFLA_XDP /* linux@d1fdd9138682e0f272beee0cb08b6328c5478b26 (4.8) */
+#define IFLA_XDP 43
+#endif
+#if !HAVE_IFLA_EVENT /* linux@3d3ea5af5c0b382bc9d9aed378fd814fb5d4a011 (4.13) */
+#define IFLA_EVENT 44
+#endif
+#if !HAVE_IFLA_IF_NETNSID /* linux@6621dd29eb9b5e6774ec7a9a75161352fdea47fc, 79e1ad148c844f5c8b9d76b36b26e3886dca95ae (4.15) */
+#define IFLA_IF_NETNSID 45
+#define IFLA_NEW_NETNSID 46
+#endif
+#if !HAVE_IFLA_TARGET_NETNSID /* linux@19d8f1ad12fd746e60707a58d954980013c7a35a (4.20) */
+#define IFLA_TARGET_NETNSID IFLA_IF_NETNSID
+#endif
+#if !HAVE_IFLA_NEW_IFINDEX /* linux@b2d3bcfa26a7a8de41f358a6cae8b848673b3c6e, 38e01b30563a5b5ade7b54e5d739d16a2b02fe82 (4.16) */
+#define IFLA_CARRIER_UP_COUNT 47
+#define IFLA_CARRIER_DOWN_COUNT 48
+#define IFLA_NEW_IFINDEX 49
+#endif
+#if !HAVE_IFLA_MAX_MTU /* linux@3e7a50ceb11ea75c27e944f1a01e478fd62a2d8d (4.19) */
+#define IFLA_MIN_MTU 50
+#define IFLA_MAX_MTU 51
+
+#undef IFLA_MAX
+#define IFLA_MAX 51
+#endif
+
+#if !HAVE_IFLA_BOND_MODE /* linux@90af231106c0b8d223c27d35464af95cb3d9cacf (3.13) */
+#define IFLA_BOND_MODE 1
+#endif
+#if !HAVE_IFLA_BOND_ACTIVE_SLAVE /* linux@ec76aa49855f6d6fea5e01de179fb57dd47c619d (3.13) */
+#define IFLA_BOND_ACTIVE_SLAVE 2
+#endif
+#if !HAVE_IFLA_BOND_AD_INFO /* linux@4ee7ac7526d4a9413cafa733d824edfe49fdcc46 (3.14) */
+#define IFLA_BOND_MIIMON 3
+#define IFLA_BOND_UPDELAY 4
+#define IFLA_BOND_DOWNDELAY 5
+#define IFLA_BOND_USE_CARRIER 6
+#define IFLA_BOND_ARP_INTERVAL 7
+#define IFLA_BOND_ARP_IP_TARGET 8
+#define IFLA_BOND_ARP_VALIDATE 9
+#define IFLA_BOND_ARP_ALL_TARGETS 10
+#define IFLA_BOND_PRIMARY 11
+#define IFLA_BOND_PRIMARY_RESELECT 12
+#define IFLA_BOND_FAIL_OVER_MAC 13
+#define IFLA_BOND_XMIT_HASH_POLICY 14
+#define IFLA_BOND_RESEND_IGMP 15
+#define IFLA_BOND_NUM_PEER_NOTIF 16
+#define IFLA_BOND_ALL_SLAVES_ACTIVE 17
+#define IFLA_BOND_MIN_LINKS 18
+#define IFLA_BOND_LP_INTERVAL 19
+#define IFLA_BOND_PACKETS_PER_SLAVE 20
+#define IFLA_BOND_AD_LACP_RATE 21
+#define IFLA_BOND_AD_SELECT 22
+#define IFLA_BOND_AD_INFO 23
+#endif
+#if !HAVE_IFLA_BOND_AD_ACTOR_SYSTEM /* linux@171a42c38c6e1a5a076d6276e94e55a0b5b7868c (4.2) */
+#define IFLA_BOND_AD_ACTOR_SYS_PRIO 24
+#define IFLA_BOND_AD_USER_PORT_KEY 25
+#define IFLA_BOND_AD_ACTOR_SYSTEM 26
+#endif
+#if !HAVE_IFLA_BOND_TLB_DYNAMIC_LB /* linux@0f7bffd9e512b77279bbce704fad3cb1d6887958 (4.3) */
+#define IFLA_BOND_TLB_DYNAMIC_LB 27
+
+#undef IFLA_BOND_MAX
+#define IFLA_BOND_MAX 27
+#endif
+
+#if !HAVE_IFLA_VXLAN_UDP_ZERO_CSUM6_RX /* linux@359a0ea9875ef4f32c8425bbe1ae348e1fd2ed2a (3.16) */
+#define IFLA_VXLAN_UDP_CSUM 18
+#define IFLA_VXLAN_UDP_ZERO_CSUM6_TX 19
+#define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20
+#endif
+#if !HAVE_IFLA_VXLAN_REMCSUM_NOPARTIAL /* linux@dfd8645ea1bd91277f841e74c33e1f4dbbede808..0ace2ca89cbd6bcdf2b9d2df1fa0fa24ea9d1653 (4.0) */
+#define IFLA_VXLAN_REMCSUM_TX 21
+#define IFLA_VXLAN_REMCSUM_RX 22
+#define IFLA_VXLAN_GBP 23
+#define IFLA_VXLAN_REMCSUM_NOPARTIAL 24
+#endif
+#if !HAVE_IFLA_VXLAN_COLLECT_METADATA /* linux@f8a9b1bc1b238eed9987da747a0e52f5bb009980 (4.3) */
+#define IFLA_VXLAN_COLLECT_METADATA 25
+#endif
+#if !HAVE_IFLA_VXLAN_LABEL /* linux@e7f70af111f086a20800ad2e17f544b2e3e0f375 (4.6) */
+#define IFLA_VXLAN_LABEL 26
+#endif
+#if !HAVE_IFLA_VXLAN_GPE /* linux@e1e5314de08ba6003b358125eafc9ad9e75a950c (4.7) */
+#define IFLA_VXLAN_GPE 27
+#endif
+#if !HAVE_IFLA_VXLAN_TTL_INHERIT /* linux@72f6d71e491e6ce269b564865b21fab0a4402dd3 (4.18) */
+#define IFLA_VXLAN_TTL_INHERIT 28
+
+#undef IFLA_VXLAN_MAX
+#define IFLA_VXLAN_MAX 28
+#endif
+
+#if !HAVE_IFLA_GENEVE_TOS /* linux@2d07dc79fe04a43d82a346ced6bbf07bdb523f1b..d89511251f6519599b109dc6cda87a6ab314ed8c (4.2) */
+enum {
+ IFLA_GENEVE_UNSPEC,
+ IFLA_GENEVE_ID,
+ IFLA_GENEVE_REMOTE,
+ IFLA_GENEVE_TTL,
+ IFLA_GENEVE_TOS,
+ IFLA_GENEVE_PORT, /* destination port */
+ IFLA_GENEVE_COLLECT_METADATA,
+ IFLA_GENEVE_REMOTE6,
+ IFLA_GENEVE_UDP_CSUM,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+ IFLA_GENEVE_LABEL,
+ IFLA_GENEVE_TTL_INHERIT,
+ __IFLA_GENEVE_MAX
+};
+#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
+#else
+#if !HAVE_IFLA_GENEVE_COLLECT_METADATA /* linux@e305ac6cf5a1e1386aedce7ef9cb773635d5845c (4.3) */
+#define IFLA_GENEVE_PORT 5
+#define IFLA_GENEVE_COLLECT_METADATA 6
+#endif
+#if !HAVE_IFLA_GENEVE_REMOTE6 /* linux@8ed66f0e8235118a31720acdab3bbbe9debd0f6a (4.4) */
+#define IFLA_GENEVE_REMOTE6 7
+#endif
+#if !HAVE_IFLA_GENEVE_UDP_ZERO_CSUM6_RX /* linux@abe492b4f50c3ae2ebcfaa2f5c16176aebaa1c68 (4.5) */
+#define IFLA_GENEVE_UDP_CSUM 8
+#define IFLA_GENEVE_UDP_ZERO_CSUM6_TX 9
+#define IFLA_GENEVE_UDP_ZERO_CSUM6_RX 10
+#endif
+#if !HAVE_IFLA_GENEVE_LABEL /* linux@8eb3b99554b82da968d1fbc00df9f3156c5e2d63 (4.6) */
+#define IFLA_GENEVE_LABEL 11
+#endif
+#if !HAVE_IFLA_GENEVE_TTL_INHERIT /* linux@52d0d404d39dd9eac71a181615d6ca15e23d8e38 (4.20) */
+#define IFLA_GENEVE_TTL_INHERIT 12
+
+#undef IFLA_GENEVE_MAX
+#define IFLA_GENEVE_MAX 12
+#endif
+#endif
+
+#if !HAVE_IFLA_BR_MAX_AGE /* linux@e5c3ea5c668033b303e7ac835d7d91da32d97958 (3.18) */
+enum {
+ IFLA_BR_UNSPEC,
+ IFLA_BR_FORWARD_DELAY,
+ IFLA_BR_HELLO_TIME,
+ IFLA_BR_MAX_AGE,
+ IFLA_BR_AGEING_TIME,
+ IFLA_BR_STP_STATE,
+ IFLA_BR_PRIORITY,
+ IFLA_BR_VLAN_FILTERING,
+ IFLA_BR_VLAN_PROTOCOL,
+ IFLA_BR_GROUP_FWD_MASK,
+ IFLA_BR_ROOT_ID,
+ IFLA_BR_BRIDGE_ID,
+ IFLA_BR_ROOT_PORT,
+ IFLA_BR_ROOT_PATH_COST,
+ IFLA_BR_TOPOLOGY_CHANGE,
+ IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+ IFLA_BR_HELLO_TIMER,
+ IFLA_BR_TCN_TIMER,
+ IFLA_BR_TOPOLOGY_CHANGE_TIMER,
+ IFLA_BR_GC_TIMER,
+ IFLA_BR_GROUP_ADDR,
+ IFLA_BR_FDB_FLUSH,
+ IFLA_BR_MCAST_ROUTER,
+ IFLA_BR_MCAST_SNOOPING,
+ IFLA_BR_MCAST_QUERY_USE_IFADDR,
+ IFLA_BR_MCAST_QUERIER,
+ IFLA_BR_MCAST_HASH_ELASTICITY,
+ IFLA_BR_MCAST_HASH_MAX,
+ IFLA_BR_MCAST_LAST_MEMBER_CNT,
+ IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+ IFLA_BR_MCAST_LAST_MEMBER_INTVL,
+ IFLA_BR_MCAST_MEMBERSHIP_INTVL,
+ IFLA_BR_MCAST_QUERIER_INTVL,
+ IFLA_BR_MCAST_QUERY_INTVL,
+ IFLA_BR_MCAST_QUERY_RESPONSE_INTVL,
+ IFLA_BR_MCAST_STARTUP_QUERY_INTVL,
+ IFLA_BR_NF_CALL_IPTABLES,
+ IFLA_BR_NF_CALL_IP6TABLES,
+ IFLA_BR_NF_CALL_ARPTABLES,
+ IFLA_BR_VLAN_DEFAULT_PVID,
+ IFLA_BR_PAD,
+ IFLA_BR_VLAN_STATS_ENABLED,
+ IFLA_BR_MCAST_STATS_ENABLED,
+ IFLA_BR_MCAST_IGMP_VERSION,
+ IFLA_BR_MCAST_MLD_VERSION,
+ IFLA_BR_VLAN_STATS_PER_PORT,
+ __IFLA_BR_MAX,
+};
+
+#define IFLA_BR_MAX (__IFLA_BR_MAX - 1)
+#else
+#if !HAVE_IFLA_BR_PRIORITY /* linux@af615762e972be0c66cf1d156ca4fac13b93c0b0 (4.1) */
+#define IFLA_BR_AGEING_TIME 4
+#define IFLA_BR_STP_STATE 5
+#define IFLA_BR_PRIORITY 6
+#endif
+#if !HAVE_IFLA_BR_VLAN_PROTOCOL /* linux@a7854037da006a7472c48773e3190db55217ec9b, d2d427b3927bd7a0348fc7f323d0e291f79a2779 (4.3) */
+#define IFLA_BR_VLAN_FILTERING 7
+#define IFLA_BR_VLAN_PROTOCOL 8
+#endif
+#if !HAVE_IFLA_BR_VLAN_DEFAULT_PVID /* linux@7910228b6bb35f3c8e0bc72a8d84c29616cb1b90..0f963b7592ef9e054974b6672b86ec1edd84b4bc (4.4) */
+#define IFLA_BR_GROUP_FWD_MASK 9
+#define IFLA_BR_ROOT_ID 10
+#define IFLA_BR_BRIDGE_ID 11
+#define IFLA_BR_ROOT_PORT 12
+#define IFLA_BR_ROOT_PATH_COST 13
+#define IFLA_BR_TOPOLOGY_CHANGE 14
+#define IFLA_BR_TOPOLOGY_CHANGE_DETECTED 15
+#define IFLA_BR_HELLO_TIMER 16
+#define IFLA_BR_TCN_TIMER 17
+#define IFLA_BR_TOPOLOGY_CHANGE_TIMER 18
+#define IFLA_BR_GC_TIMER 19
+#define IFLA_BR_GROUP_ADDR 20
+#define IFLA_BR_FDB_FLUSH 21
+#define IFLA_BR_MCAST_ROUTER 22
+#define IFLA_BR_MCAST_SNOOPING 23
+#define IFLA_BR_MCAST_QUERY_USE_IFADDR 24
+#define IFLA_BR_MCAST_QUERIER 25
+#define IFLA_BR_MCAST_HASH_ELASTICITY 26
+#define IFLA_BR_MCAST_HASH_MAX 27
+#define IFLA_BR_MCAST_LAST_MEMBER_CNT 28
+#define IFLA_BR_MCAST_STARTUP_QUERY_CNT 29
+#define IFLA_BR_MCAST_LAST_MEMBER_INTVL 30
+#define IFLA_BR_MCAST_MEMBERSHIP_INTVL 31
+#define IFLA_BR_MCAST_QUERIER_INTVL 32
+#define IFLA_BR_MCAST_QUERY_INTVL 33
+#define IFLA_BR_MCAST_QUERY_RESPONSE_INTVL 34
+#define IFLA_BR_MCAST_STARTUP_QUERY_INTVL 35
+#define IFLA_BR_NF_CALL_IPTABLES 36
+#define IFLA_BR_NF_CALL_IP6TABLES 37
+#define IFLA_BR_NF_CALL_ARPTABLES 38
+#define IFLA_BR_VLAN_DEFAULT_PVID 39
+#endif
+#if !HAVE_IFLA_BR_VLAN_STATS_ENABLED /* linux@12a0faa3bd76157b9dc096758d6818ff535e4586, 6dada9b10a0818ba72c249526a742c8c41274a73 (4.7) */
+#define IFLA_BR_PAD 40
+#define IFLA_BR_VLAN_STATS_ENABLED 41
+#endif
+#if !HAVE_IFLA_BR_MCAST_STATS_ENABLED /* linux@1080ab95e3c7bdd77870e209aff83c763fdcf439 (4.8) */
+#define IFLA_BR_MCAST_STATS_ENABLED 42
+#endif
+#if !HAVE_IFLA_BR_MCAST_MLD_VERSION /* linux@5e9235853d652a295d5f56cb8652950b6b5bf56b, aa2ae3e71c74cc00ec22f133dc900b3817415785 (4.10) */
+#define IFLA_BR_MCAST_IGMP_VERSION 43
+#define IFLA_BR_MCAST_MLD_VERSION 44
+#endif
+#if !HAVE_IFLA_BR_VLAN_STATS_PER_PORT /* linux@9163a0fc1f0c0980f117cc25f4fa6ba9b0750a36 (4.20) */
+#define IFLA_BR_VLAN_STATS_PER_PORT 45
+
+#undef IFLA_BR_MAX
+#define IFLA_BR_MAX 45
+#endif
+#endif
+
+#if !HAVE_IFLA_BRPORT_LEARNING_SYNC /* linux@958501163ddd6ea22a98f94fa0e7ce6d4734e5c4, efacacdaf7cb5a0592ed772e3731636b2742e34a (3.19)*/
+#define IFLA_BRPORT_PROXYARP 10
+#define IFLA_BRPORT_LEARNING_SYNC 11
+#endif
+#if !HAVE_IFLA_BRPORT_PROXYARP_WIFI /* linux@842a9ae08a25671db3d4f689eed68b4d64be15b5 (4.1) */
+#define IFLA_BRPORT_PROXYARP_WIFI 12
+#endif
+#if !HAVE_IFLA_BRPORT_MULTICAST_ROUTER /* linux@4ebc7660ab4559cad10b6595e05f70562bb26dc5..5d6ae479ab7ddf77bb22bdf739268581453ff886 (4.4) */
+#define IFLA_BRPORT_ROOT_ID 13
+#define IFLA_BRPORT_BRIDGE_ID 14
+#define IFLA_BRPORT_DESIGNATED_PORT 15
+#define IFLA_BRPORT_DESIGNATED_COST 16
+#define IFLA_BRPORT_ID 17
+#define IFLA_BRPORT_NO 18
+#define IFLA_BRPORT_TOPOLOGY_CHANGE_ACK 19
+#define IFLA_BRPORT_CONFIG_PENDING 20
+#define IFLA_BRPORT_MESSAGE_AGE_TIMER 21
+#define IFLA_BRPORT_FORWARD_DELAY_TIMER 22
+#define IFLA_BRPORT_HOLD_TIMER 23
+#define IFLA_BRPORT_FLUSH 24
+#define IFLA_BRPORT_MULTICAST_ROUTER 25
+#endif
+#if !HAVE_IFLA_BRPORT_PAD /* linux@12a0faa3bd76157b9dc096758d6818ff535e4586 (4.7) */
+#define IFLA_BRPORT_PAD 26
+#endif
+#if !HAVE_IFLA_BRPORT_MCAST_FLOOD /* linux@b6cb5ac8331b6bcfe9ce38c7f7f58db6e1d6270a (4.9) */
+#define IFLA_BRPORT_MCAST_FLOOD 27
+#endif
+#if !HAVE_IFLA_BRPORT_VLAN_TUNNEL /* linux@6db6f0eae6052b70885562e1733896647ec1d807, b3c7ef0adadc5768e0baa786213c6bd1ce521a77 (4.11) */
+#define IFLA_BRPORT_MCAST_TO_UCAST 28
+#define IFLA_BRPORT_VLAN_TUNNEL 29
+#endif
+#if !HAVE_IFLA_BRPORT_BCAST_FLOOD /* linux@99f906e9ad7b6e79ffeda30f45906a8448b9d6a2 (4.12) */
+#define IFLA_BRPORT_BCAST_FLOOD 30
+#endif
+#if !HAVE_IFLA_BRPORT_NEIGH_SUPPRESS /* linux@5af48b59f35cf712793badabe1a574a0d0ce3bd3, 821f1b21cabb46827ce39ddf82e2789680b5042a (4.15) */
+#define IFLA_BRPORT_GROUP_FWD_MASK 31
+#define IFLA_BRPORT_NEIGH_SUPPRESS 32
+#endif
+#if !HAVE_IFLA_BRPORT_ISOLATED /* linux@7d850abd5f4edb1b1ca4b4141a4453305736f564 (4.18) */
+#define IFLA_BRPORT_ISOLATED 33
+#endif
+#if !HAVE_IFLA_BRPORT_BACKUP_PORT /* linux@2756f68c314917d03eb348084edb08bb929139d9 (4.19) */
+#define IFLA_BRPORT_BACKUP_PORT 34
+
+#undef IFLA_BRPORT_MAX
+#define IFLA_BRPORT_MAX 34
+#endif
+
+#if !HAVE_IFLA_VRF_TABLE /* linux@4e3c89920cd3a6cfce22c6f537690747c26128dd (4.3) */
+enum {
+ IFLA_VRF_UNSPEC,
+ IFLA_VRF_TABLE,
+ __IFLA_VRF_MAX
+};
+#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
+#endif
diff --git a/src/basic/missing_if_tunnel.h b/src/basic/missing_if_tunnel.h
new file mode 100644
index 0000000..f51fdd1
--- /dev/null
+++ b/src/basic/missing_if_tunnel.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !HAVE_IFLA_VTI_FWMARK /* linux@0a473b82cb23e7a35c4be6e9765c8487a65e8f55 (4.12) */
+#define IFLA_VTI_FWMARK 6
+
+#undef IFLA_VTI_MAX
+#define IFLA_VTI_MAX 6
+#endif
+
+#if !HAVE_IFLA_IPTUN_ENCAP_DPORT /* linux@56328486539ddd07cbaafec7a542a2c8a3043623 (3.18)*/
+#define IFLA_IPTUN_ENCAP_TYPE 15
+#define IFLA_IPTUN_ENCAP_FLAGS 16
+#define IFLA_IPTUN_ENCAP_SPORT 17
+#define IFLA_IPTUN_ENCAP_DPORT 18
+#endif
+
+#if !HAVE_IFLA_IPTUN_COLLECT_METADATA /* linux@cfc7381b3002756b1dcada32979e942aa3126e31 (4.9) */
+#define IFLA_IPTUN_COLLECT_METADATA 19
+#endif
+
+#if !HAVE_IFLA_IPTUN_FWMARK /* linux@0a473b82cb23e7a35c4be6e9765c8487a65e8f55 (4.12) */
+#define IFLA_IPTUN_FWMARK 20
+
+#undef IFLA_IPTUN_MAX
+#define IFLA_IPTUN_MAX 20
+#endif
+
+#if !HAVE_IFLA_GRE_ENCAP_DPORT /* linux@4565e9919cda747815547e2e5d7b78f15efbffdf (3.18) */
+#define IFLA_GRE_ENCAP_TYPE 14
+#define IFLA_GRE_ENCAP_FLAGS 15
+#define IFLA_GRE_ENCAP_SPORT 16
+#define IFLA_GRE_ENCAP_DPORT 17
+#endif
+
+#if !HAVE_IFLA_GRE_COLLECT_METADATA /* linux@2e15ea390e6f4466655066d97e22ec66870a042c (4.3) */
+#define IFLA_GRE_COLLECT_METADATA 18
+#endif
+
+#if !HAVE_IFLA_GRE_IGNORE_DF /* linux@22a59be8b7693eb2d0897a9638f5991f2f8e4ddd (4.8) */
+#define IFLA_GRE_IGNORE_DF 19
+#endif
+
+#if !HAVE_IFLA_GRE_FWMARK /* linux@0a473b82cb23e7a35c4be6e9765c8487a65e8f55 (4.12) */
+#define IFLA_GRE_FWMARK 20
+#endif
+
+#if !HAVE_IFLA_GRE_ERSPAN_INDEX /* linux@84e54fe0a5eaed696dee4019c396f8396f5a908b (4.14) */
+#define IFLA_GRE_ERSPAN_INDEX 21
+#endif
+
+#if !HAVE_IFLA_GRE_ERSPAN_HWID /* linux@f551c91de262ba36b20c3ac19538afb4f4507441 (4.16) */
+#define IFLA_GRE_ERSPAN_VER 22
+#define IFLA_GRE_ERSPAN_DIR 23
+#define IFLA_GRE_ERSPAN_HWID 24
+
+#undef IFLA_GRE_MAX
+#define IFLA_GRE_MAX 24
+#endif
diff --git a/src/basic/missing_input.h b/src/basic/missing_input.h
new file mode 100644
index 0000000..b91ccb6
--- /dev/null
+++ b/src/basic/missing_input.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/input.h>
+#include <linux/types.h>
+
+/* linux@c7dc65737c9a607d3e6f8478659876074ad129b8 (3.12) */
+#ifndef EVIOCREVOKE
+#define EVIOCREVOKE _IOW('E', 0x91, int)
+#endif
+
+/* linux@06a16293f71927f756dcf37558a79c0b05a91641 (4.4) */
+#ifndef EVIOCSMASK
+struct input_mask {
+ __u32 type;
+ __u32 codes_size;
+ __u64 codes_ptr;
+};
+
+#define EVIOCGMASK _IOR('E', 0x92, struct input_mask)
+#define EVIOCSMASK _IOW('E', 0x93, struct input_mask)
+#endif
+
+/* linux@7611392fe8ff95ecae528b01a815ae3d72ca6b95 (3.17) */
+#ifndef INPUT_PROP_POINTING_STICK
+#define INPUT_PROP_POINTING_STICK 0x05
+#endif
+
+/* linux@500d4160abe9a2e88b12e319c13ae3ebd1e18108 (4.0) */
+#ifndef INPUT_PROP_ACCELEROMETER
+#define INPUT_PROP_ACCELEROMETER 0x06
+#endif
+
+/* linux@d09bbfd2a8408a995419dff0d2ba906013cf4cc9 (3.11) */
+#ifndef BTN_DPAD_UP
+#define BTN_DPAD_UP 0x220
+#define BTN_DPAD_DOWN 0x221
+#define BTN_DPAD_LEFT 0x222
+#define BTN_DPAD_RIGHT 0x223
+#endif
+
+/* linux@358f24704f2f016af7d504b357cdf32606091d07 (3.13) */
+#ifndef KEY_ALS_TOGGLE
+#define KEY_ALS_TOGGLE 0x230
+#endif
diff --git a/src/basic/missing_keyctl.h b/src/basic/missing_keyctl.h
new file mode 100644
index 0000000..7eb7095
--- /dev/null
+++ b/src/basic/missing_keyctl.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/keyctl.h>
+
+#ifndef KEYCTL_JOIN_SESSION_KEYRING
+#define KEYCTL_JOIN_SESSION_KEYRING 1
+#endif
+
+#ifndef KEYCTL_CHOWN
+#define KEYCTL_CHOWN 4
+#endif
+
+#ifndef KEYCTL_SETPERM
+#define KEYCTL_SETPERM 5
+#endif
+
+#ifndef KEYCTL_DESCRIBE
+#define KEYCTL_DESCRIBE 6
+#endif
+
+#ifndef KEYCTL_LINK
+#define KEYCTL_LINK 8
+#endif
+
+#ifndef KEYCTL_READ
+#define KEYCTL_READ 11
+#endif
+
+#ifndef KEYCTL_SET_TIMEOUT
+#define KEYCTL_SET_TIMEOUT 15
+#endif
+
+#ifndef KEY_SPEC_USER_KEYRING
+#define KEY_SPEC_USER_KEYRING -4
+#endif
+
+#ifndef KEY_SPEC_SESSION_KEYRING
+#define KEY_SPEC_SESSION_KEYRING -3
+#endif
+
+/* From linux/key.h */
+#ifndef KEY_POS_VIEW
+
+typedef int32_t key_serial_t;
+
+#define KEY_POS_VIEW 0x01000000
+#define KEY_POS_READ 0x02000000
+#define KEY_POS_WRITE 0x04000000
+#define KEY_POS_SEARCH 0x08000000
+#define KEY_POS_LINK 0x10000000
+#define KEY_POS_SETATTR 0x20000000
+#define KEY_POS_ALL 0x3f000000
+
+#define KEY_USR_VIEW 0x00010000
+#define KEY_USR_READ 0x00020000
+#define KEY_USR_WRITE 0x00040000
+#define KEY_USR_SEARCH 0x00080000
+#define KEY_USR_LINK 0x00100000
+#define KEY_USR_SETATTR 0x00200000
+#define KEY_USR_ALL 0x003f0000
+
+#define KEY_GRP_VIEW 0x00000100
+#define KEY_GRP_READ 0x00000200
+#define KEY_GRP_WRITE 0x00000400
+#define KEY_GRP_SEARCH 0x00000800
+#define KEY_GRP_LINK 0x00001000
+#define KEY_GRP_SETATTR 0x00002000
+#define KEY_GRP_ALL 0x00003f00
+
+#define KEY_OTH_VIEW 0x00000001
+#define KEY_OTH_READ 0x00000002
+#define KEY_OTH_WRITE 0x00000004
+#define KEY_OTH_SEARCH 0x00000008
+#define KEY_OTH_LINK 0x00000010
+#define KEY_OTH_SETATTR 0x00000020
+#define KEY_OTH_ALL 0x0000003f
+#endif
diff --git a/src/basic/missing_magic.h b/src/basic/missing_magic.h
new file mode 100644
index 0000000..4910cd3
--- /dev/null
+++ b/src/basic/missing_magic.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/magic.h>
+
+/* 62aa81d7c4c24b90fdb61da70ac0dbbc414f9939 (4.13) */
+#ifndef OCFS2_SUPER_MAGIC
+#define OCFS2_SUPER_MAGIC 0x7461636f
+#endif
+
+/* 67e9c74b8a873408c27ac9a8e4c1d1c8d72c93ff (4.5) */
+#ifndef CGROUP2_SUPER_MAGIC
+#define CGROUP2_SUPER_MAGIC 0x63677270
+#endif
+
+/* 4282d60689d4f21b40692029080440cc58e8a17d (4.1) */
+#ifndef TRACEFS_MAGIC
+#define TRACEFS_MAGIC 0x74726163
+#endif
+
+/* e149ed2b805fefdccf7ccdfc19eca22fdd4514ac (3.19) */
+#ifndef NSFS_MAGIC
+#define NSFS_MAGIC 0x6e736673
+#endif
+
+/* b2197755b2633e164a439682fb05a9b5ea48f706 (4.4) */
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC 0xcafe4a11
+#endif
+
+/* Not exposed yet (4.20). Defined at ipc/mqueue.c */
+#ifndef MQUEUE_MAGIC
+#define MQUEUE_MAGIC 0x19800202
+#endif
diff --git a/src/basic/missing_mman.h b/src/basic/missing_mman.h
new file mode 100644
index 0000000..7ff12f7
--- /dev/null
+++ b/src/basic/missing_mman.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/mman.h>
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING 0x0002U
+#endif
+
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
diff --git a/src/basic/missing_network.h b/src/basic/missing_network.h
new file mode 100644
index 0000000..59a8cd2
--- /dev/null
+++ b/src/basic/missing_network.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/loop.h>
+#include <linux/rtnetlink.h>
+#include <net/ethernet.h>
+
+#include "missing_ethtool.h"
+#include "missing_fib_rules.h"
+#include "missing_fou.h"
+#include "missing_if_bridge.h"
+#include "missing_if_link.h"
+#include "missing_if_tunnel.h"
+#include "missing_vxcan.h"
+
+/* if.h */
+/* The following two defines are actually available in the kernel headers for longer, but we define them here anyway,
+ * since that makes it easier to use them in conjunction with the glibc net/if.h header which conflicts with
+ * linux/if.h. */
+#ifndef IF_OPER_UNKNOWN
+#define IF_OPER_UNKNOWN 0
+#endif
+
+#ifndef IF_OPER_UP
+#define IF_OPER_UP 6
+#endif
+
+#ifndef IFF_LOWER_UP
+#define IFF_LOWER_UP 0x10000
+#endif
+
+#ifndef IFF_DORMANT
+#define IFF_DORMANT 0x20000
+#endif
+
+/* if_addr.h */
+#if !HAVE_IFA_FLAGS
+#define IFA_FLAGS 8
+#endif
+
+#ifndef IFA_F_MANAGETEMPADDR
+#define IFA_F_MANAGETEMPADDR 0x100
+#endif
+
+#ifndef IFA_F_NOPREFIXROUTE
+#define IFA_F_NOPREFIXROUTE 0x200
+#endif
+
+#ifndef IFA_F_MCAUTOJOIN
+#define IFA_F_MCAUTOJOIN 0x400
+#endif
+
+/* if_arp.h */
+#ifndef ARPHRD_IP6GRE
+#define ARPHRD_IP6GRE 823
+#endif
+
+/* if_bonding.h */
+#ifndef BOND_XMIT_POLICY_ENCAP23
+#define BOND_XMIT_POLICY_ENCAP23 3
+#endif
+
+#ifndef BOND_XMIT_POLICY_ENCAP34
+#define BOND_XMIT_POLICY_ENCAP34 4
+#endif
+
+/* if_tun.h */
+#ifndef IFF_MULTI_QUEUE
+#define IFF_MULTI_QUEUE 0x100
+#endif
+
+/* in6.h */
+#ifndef IPV6_UNICAST_IF
+#define IPV6_UNICAST_IF 76
+#endif
+
+/* ip.h */
+#ifndef IPV4_MIN_MTU
+#define IPV4_MIN_MTU 68
+#endif
+
+/* ipv6.h */
+#ifndef IPV6_MIN_MTU
+#define IPV6_MIN_MTU 1280
+#endif
+
+/* loop.h */
+#if !HAVE_LO_FLAGS_PARTSCAN
+#define LO_FLAGS_PARTSCAN 8
+#endif
+
+#ifndef LOOP_CTL_REMOVE
+#define LOOP_CTL_REMOVE 0x4C81
+#endif
+
+#ifndef LOOP_CTL_GET_FREE
+#define LOOP_CTL_GET_FREE 0x4C82
+#endif
+
+/* netdevice.h */
+#ifndef NET_ADDR_RANDOM
+#define NET_ADDR_RANDOM 1
+#endif
+
+#ifndef NET_NAME_UNKNOWN
+#define NET_NAME_UNKNOWN 0
+#endif
+
+#ifndef NET_NAME_ENUM
+#define NET_NAME_ENUM 1
+#endif
+
+#ifndef NET_NAME_PREDICTABLE
+#define NET_NAME_PREDICTABLE 2
+#endif
+
+#ifndef NET_NAME_USER
+#define NET_NAME_USER 3
+#endif
+
+#ifndef NET_NAME_RENAMED
+#define NET_NAME_RENAMED 4
+#endif
+
+/* netlink.h */
+#ifndef NETLINK_LIST_MEMBERSHIPS /* b42be38b2778eda2237fc759e55e3b698b05b315 (4.2) */
+#define NETLINK_LIST_MEMBERSHIPS 9
+#endif
+
+/* rtnetlink.h */
+#ifndef RTA_PREF
+#define RTA_PREF 20
+#endif
+
+#ifndef RTAX_QUICKACK
+#define RTAX_QUICKACK 15
+#endif
+
+#ifndef RTA_EXPIRES
+#define RTA_EXPIRES 23
+#endif
+
+/* Note that LOOPBACK_IFINDEX is currently not exported by the
+ * kernel/glibc, but hardcoded internally by the kernel. However, as
+ * it is exported to userspace indirectly via rtnetlink and the
+ * ioctls, and made use of widely we define it here too, in a way that
+ * is compatible with the kernel's internal definition. */
+#ifndef LOOPBACK_IFINDEX
+#define LOOPBACK_IFINDEX 1
+#endif
+
+/* Not exposed yet. Similar values are defined in net/ethernet.h */
+#ifndef ETHERTYPE_LLDP
+#define ETHERTYPE_LLDP 0x88cc
+#endif
diff --git a/src/basic/missing_prctl.h b/src/basic/missing_prctl.h
new file mode 100644
index 0000000..f80cd17
--- /dev/null
+++ b/src/basic/missing_prctl.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/prctl.h>
+
+/* 58319057b7847667f0c9585b9de0e8932b0fdb08 (4.3) */
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+
+#define PR_CAP_AMBIENT_IS_SET 1
+#define PR_CAP_AMBIENT_RAISE 2
+#define PR_CAP_AMBIENT_LOWER 3
+#define PR_CAP_AMBIENT_CLEAR_ALL 4
+#endif
diff --git a/src/basic/missing_random.h b/src/basic/missing_random.h
new file mode 100644
index 0000000..2e76031
--- /dev/null
+++ b/src/basic/missing_random.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if USE_SYS_RANDOM_H
+# include <sys/random.h>
+#else
+# include <linux/random.h>
+#endif
+
+#ifndef GRND_NONBLOCK
+#define GRND_NONBLOCK 0x0001
+#endif
+
+#ifndef GRND_RANDOM
+#define GRND_RANDOM 0x0002
+#endif
diff --git a/src/basic/missing_resource.h b/src/basic/missing_resource.h
new file mode 100644
index 0000000..22ba8ab
--- /dev/null
+++ b/src/basic/missing_resource.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/resource.h>
+
+#ifndef RLIMIT_RTTIME
+#define RLIMIT_RTTIME 15
+#endif
+
+/* If RLIMIT_RTTIME is not defined, then we cannot use RLIMIT_NLIMITS as is */
+#define _RLIMIT_MAX (RLIMIT_RTTIME+1 > RLIMIT_NLIMITS ? RLIMIT_RTTIME+1 : RLIMIT_NLIMITS)
diff --git a/src/basic/missing_sched.h b/src/basic/missing_sched.h
new file mode 100644
index 0000000..baa3913
--- /dev/null
+++ b/src/basic/missing_sched.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sched.h>
+
+#ifndef CLONE_NEWCGROUP
+#define CLONE_NEWCGROUP 0x02000000
+#endif
+
+/* Not exposed yet. Defined at include/linux/sched.h */
+#ifndef PF_KTHREAD
+#define PF_KTHREAD 0x00200000
+#endif
+
+/* The maximum thread/process name length including trailing NUL byte. This mimics the kernel definition of the same
+ * name, which we need in userspace at various places but is not defined in userspace currently, neither under this
+ * name nor any other. */
+/* Not exposed yet. Defined at include/linux/sched.h */
+#ifndef TASK_COMM_LEN
+#define TASK_COMM_LEN 16
+#endif
diff --git a/src/basic/missing_securebits.h b/src/basic/missing_securebits.h
new file mode 100644
index 0000000..40d6ec9
--- /dev/null
+++ b/src/basic/missing_securebits.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <linux/securebits.h>
+
+/* 746bf6d64275be0c65b0631d8a72b16f1454cfa1 (4.3) */
+#ifndef SECURE_NO_CAP_AMBIENT_RAISE
+#define SECURE_NO_CAP_AMBIENT_RAISE 6
+#define SECURE_NO_CAP_AMBIENT_RAISE_LOCKED 7 /* make bit-6 immutable */
+#define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
+#define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED))
+
+#undef SECURE_ALL_BITS
+#define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \
+ issecure_mask(SECURE_NO_SETUID_FIXUP) | \
+ issecure_mask(SECURE_KEEP_CAPS) | \
+ issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
+#endif
diff --git a/src/basic/missing_socket.h b/src/basic/missing_socket.h
new file mode 100644
index 0000000..a5fd457
--- /dev/null
+++ b/src/basic/missing_socket.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/socket.h>
+
+#if HAVE_LINUX_VM_SOCKETS_H
+#include <linux/vm_sockets.h>
+#else
+#define VMADDR_CID_ANY -1U
+struct sockaddr_vm {
+ unsigned short svm_family;
+ unsigned short svm_reserved1;
+ unsigned int svm_port;
+ unsigned int svm_cid;
+ unsigned char svm_zero[sizeof(struct sockaddr) -
+ sizeof(unsigned short) -
+ sizeof(unsigned short) -
+ sizeof(unsigned int) -
+ sizeof(unsigned int)];
+};
+#endif /* !HAVE_LINUX_VM_SOCKETS_H */
+
+#ifndef AF_VSOCK
+#define AF_VSOCK 40
+#endif
+
+#ifndef SO_REUSEPORT
+#define SO_REUSEPORT 15
+#endif
+
+#ifndef SO_PEERGROUPS
+#define SO_PEERGROUPS 59
+#endif
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+#ifndef SOL_ALG
+#define SOL_ALG 279
+#endif
+
+/* Not exposed yet. Defined in include/linux/socket.h. */
+#ifndef SOL_SCTP
+#define SOL_SCTP 132
+#endif
+
+/* Not exposed yet. Defined in include/linux/socket.h */
+#ifndef SCM_SECURITY
+#define SCM_SECURITY 0x03
+#endif
+
+/* netinet/in.h */
+#ifndef IP_FREEBIND
+#define IP_FREEBIND 15
+#endif
+
+#ifndef IP_TRANSPARENT
+#define IP_TRANSPARENT 19
+#endif
diff --git a/src/basic/missing_stat.h b/src/basic/missing_stat.h
new file mode 100644
index 0000000..5116206
--- /dev/null
+++ b/src/basic/missing_stat.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/types.h>
+#include <sys/stat.h>
+
+#if WANT_LINUX_STAT_H
+#include <linux/stat.h>
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#if !HAVE_STRUCT_STATX
+struct statx_timestamp {
+ __s64 tv_sec;
+ __u32 tv_nsec;
+ __s32 __reserved;
+};
+struct statx {
+ __u32 stx_mask;
+ __u32 stx_blksize;
+ __u64 stx_attributes;
+ __u32 stx_nlink;
+ __u32 stx_uid;
+ __u32 stx_gid;
+ __u16 stx_mode;
+ __u16 __spare0[1];
+ __u64 stx_ino;
+ __u64 stx_size;
+ __u64 stx_blocks;
+ __u64 stx_attributes_mask;
+ struct statx_timestamp stx_atime;
+ struct statx_timestamp stx_btime;
+ struct statx_timestamp stx_ctime;
+ struct statx_timestamp stx_mtime;
+ __u32 stx_rdev_major;
+ __u32 stx_rdev_minor;
+ __u32 stx_dev_major;
+ __u32 stx_dev_minor;
+ __u64 __spare2[14];
+};
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_BTIME
+#define STATX_BTIME 0x00000800U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef AT_STATX_DONT_SYNC
+#define AT_STATX_DONT_SYNC 0x4000
+#endif
diff --git a/src/basic/missing_stdlib.h b/src/basic/missing_stdlib.h
new file mode 100644
index 0000000..188a8d4
--- /dev/null
+++ b/src/basic/missing_stdlib.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdlib.h>
+
+/* stdlib.h */
+#if !HAVE_SECURE_GETENV
+# if HAVE___SECURE_GETENV
+# define secure_getenv __secure_getenv
+# else
+# error "neither secure_getenv nor __secure_getenv are available"
+# endif
+#endif
diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h
new file mode 100644
index 0000000..d1aa322
--- /dev/null
+++ b/src/basic/missing_syscall.h
@@ -0,0 +1,446 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* Missing glibc definitions to access certain kernel APIs */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifdef ARCH_MIPS
+#include <asm/sgidefs.h>
+#endif
+
+#include "missing_keyctl.h"
+#include "missing_stat.h"
+
+/* linux/kcmp.h */
+#ifndef KCMP_FILE /* 3f4994cfc15f38a3159c6e3a4b3ab2e1481a6b02 (3.19) */
+#define KCMP_FILE 0
+#endif
+
+#if !HAVE_PIVOT_ROOT
+static inline int missing_pivot_root(const char *new_root, const char *put_old) {
+ return syscall(__NR_pivot_root, new_root, put_old);
+}
+
+# define pivot_root missing_pivot_root
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_MEMFD_CREATE
+# ifndef __NR_memfd_create
+# if defined __x86_64__
+# define __NR_memfd_create 319
+# elif defined __arm__
+# define __NR_memfd_create 385
+# elif defined __aarch64__
+# define __NR_memfd_create 279
+# elif defined __s390__
+# define __NR_memfd_create 350
+# elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define __NR_memfd_create 4354
+# endif
+# if _MIPS_SIM == _MIPS_SIM_NABI32
+# define __NR_memfd_create 6318
+# endif
+# if _MIPS_SIM == _MIPS_SIM_ABI64
+# define __NR_memfd_create 5314
+# endif
+# elif defined __i386__
+# define __NR_memfd_create 356
+# elif defined __arc__
+# define __NR_memfd_create 279
+# else
+# warning "__NR_memfd_create unknown for your architecture"
+# endif
+# endif
+
+static inline int missing_memfd_create(const char *name, unsigned int flags) {
+# ifdef __NR_memfd_create
+ return syscall(__NR_memfd_create, name, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define memfd_create missing_memfd_create
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_GETRANDOM
+# ifndef __NR_getrandom
+# if defined __x86_64__
+# define __NR_getrandom 318
+# elif defined(__i386__)
+# define __NR_getrandom 355
+# elif defined(__arm__)
+# define __NR_getrandom 384
+# elif defined(__aarch64__)
+# define __NR_getrandom 278
+# elif defined(__ia64__)
+# define __NR_getrandom 1339
+# elif defined(__m68k__)
+# define __NR_getrandom 352
+# elif defined(__s390x__)
+# define __NR_getrandom 349
+# elif defined(__powerpc__)
+# define __NR_getrandom 359
+# elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define __NR_getrandom 4353
+# endif
+# if _MIPS_SIM == _MIPS_SIM_NABI32
+# define __NR_getrandom 6317
+# endif
+# if _MIPS_SIM == _MIPS_SIM_ABI64
+# define __NR_getrandom 5313
+# endif
+# elif defined(__arc__)
+# define __NR_getrandom 278
+# else
+# warning "__NR_getrandom unknown for your architecture"
+# endif
+# endif
+
+static inline int missing_getrandom(void *buffer, size_t count, unsigned flags) {
+# ifdef __NR_getrandom
+ return syscall(__NR_getrandom, buffer, count, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define getrandom missing_getrandom
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_GETTID
+static inline pid_t missing_gettid(void) {
+ return (pid_t) syscall(__NR_gettid);
+}
+
+# define gettid missing_gettid
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_NAME_TO_HANDLE_AT
+# ifndef __NR_name_to_handle_at
+# if defined(__x86_64__)
+# define __NR_name_to_handle_at 303
+# elif defined(__i386__)
+# define __NR_name_to_handle_at 341
+# elif defined(__arm__)
+# define __NR_name_to_handle_at 370
+# elif defined(__powerpc__)
+# define __NR_name_to_handle_at 345
+# elif defined(__arc__)
+# define __NR_name_to_handle_at 264
+# else
+# error "__NR_name_to_handle_at is not defined"
+# endif
+# endif
+
+struct file_handle {
+ unsigned int handle_bytes;
+ int handle_type;
+ unsigned char f_handle[0];
+};
+
+static inline int missing_name_to_handle_at(int fd, const char *name, struct file_handle *handle, int *mnt_id, int flags) {
+# ifdef __NR_name_to_handle_at
+ return syscall(__NR_name_to_handle_at, fd, name, handle, mnt_id, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define name_to_handle_at missing_name_to_handle_at
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_SETNS
+# ifndef __NR_setns
+# if defined(__x86_64__)
+# define __NR_setns 308
+# elif defined(__i386__)
+# define __NR_setns 346
+# elif defined(__arc__)
+# define __NR_setns 268
+# else
+# error "__NR_setns is not defined"
+# endif
+# endif
+
+static inline int missing_setns(int fd, int nstype) {
+# ifdef __NR_setns
+ return syscall(__NR_setns, fd, nstype);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define setns missing_setns
+#endif
+
+/* ======================================================================= */
+
+static inline pid_t raw_getpid(void) {
+#if defined(__alpha__)
+ return (pid_t) syscall(__NR_getxpid);
+#else
+ return (pid_t) syscall(__NR_getpid);
+#endif
+}
+
+/* ======================================================================= */
+
+#if !HAVE_RENAMEAT2
+# ifndef __NR_renameat2
+# if defined __x86_64__
+# define __NR_renameat2 316
+# elif defined __arm__
+# define __NR_renameat2 382
+# elif defined __aarch64__
+# define __NR_renameat2 276
+# elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define __NR_renameat2 4351
+# endif
+# if _MIPS_SIM == _MIPS_SIM_NABI32
+# define __NR_renameat2 6315
+# endif
+# if _MIPS_SIM == _MIPS_SIM_ABI64
+# define __NR_renameat2 5311
+# endif
+# elif defined __i386__
+# define __NR_renameat2 353
+# elif defined __powerpc64__
+# define __NR_renameat2 357
+# elif defined __s390__ || defined __s390x__
+# define __NR_renameat2 347
+# elif defined __arc__
+# define __NR_renameat2 276
+# else
+# warning "__NR_renameat2 unknown for your architecture"
+# endif
+# endif
+
+static inline int missing_renameat2(int oldfd, const char *oldname, int newfd, const char *newname, unsigned flags) {
+# ifdef __NR_renameat2
+ return syscall(__NR_renameat2, oldfd, oldname, newfd, newname, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define renameat2 missing_renameat2
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_KCMP
+static inline int missing_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) {
+# ifdef __NR_kcmp
+ return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define kcmp missing_kcmp
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_KEYCTL
+static inline long missing_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) {
+# ifdef __NR_keyctl
+ return syscall(__NR_keyctl, cmd, arg2, arg3, arg4, arg5);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+
+# define keyctl missing_keyctl
+}
+
+static inline key_serial_t missing_add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) {
+# ifdef __NR_add_key
+ return syscall(__NR_add_key, type, description, payload, plen, ringid);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+
+# define add_key missing_add_key
+}
+
+static inline key_serial_t missing_request_key(const char *type, const char *description, const char * callout_info, key_serial_t destringid) {
+# ifdef __NR_request_key
+ return syscall(__NR_request_key, type, description, callout_info, destringid);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+
+# define request_key missing_request_key
+}
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_COPY_FILE_RANGE
+# ifndef __NR_copy_file_range
+# if defined(__x86_64__)
+# define __NR_copy_file_range 326
+# elif defined(__i386__)
+# define __NR_copy_file_range 377
+# elif defined __s390__
+# define __NR_copy_file_range 375
+# elif defined __arm__
+# define __NR_copy_file_range 391
+# elif defined __aarch64__
+# define __NR_copy_file_range 285
+# elif defined __powerpc__
+# define __NR_copy_file_range 379
+# elif defined __arc__
+# define __NR_copy_file_range 285
+# else
+# warning "__NR_copy_file_range not defined for your architecture"
+# endif
+# endif
+
+static inline ssize_t missing_copy_file_range(int fd_in, loff_t *off_in,
+ int fd_out, loff_t *off_out,
+ size_t len,
+ unsigned int flags) {
+# ifdef __NR_copy_file_range
+ return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define copy_file_range missing_copy_file_range
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_BPF
+# ifndef __NR_bpf
+# if defined __i386__
+# define __NR_bpf 357
+# elif defined __x86_64__
+# define __NR_bpf 321
+# elif defined __aarch64__
+# define __NR_bpf 280
+# elif defined __arm__
+# define __NR_bpf 386
+# elif defined __sparc__
+# define __NR_bpf 349
+# elif defined __s390__
+# define __NR_bpf 351
+# elif defined __tilegx__
+# define __NR_bpf 280
+# else
+# warning "__NR_bpf not defined for your architecture"
+# endif
+# endif
+
+union bpf_attr;
+
+static inline int missing_bpf(int cmd, union bpf_attr *attr, size_t size) {
+#ifdef __NR_bpf
+ return (int) syscall(__NR_bpf, cmd, attr, size);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+# define bpf missing_bpf
+#endif
+
+/* ======================================================================= */
+
+#ifndef __IGNORE_pkey_mprotect
+# ifndef __NR_pkey_mprotect
+# if defined __i386__
+# define __NR_pkey_mprotect 380
+# elif defined __x86_64__
+# define __NR_pkey_mprotect 329
+# elif defined __arm__
+# define __NR_pkey_mprotect 394
+# elif defined __aarch64__
+# define __NR_pkey_mprotect 394
+# elif defined __powerpc__
+# define __NR_pkey_mprotect 386
+# elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define __NR_pkey_mprotect 4363
+# endif
+# if _MIPS_SIM == _MIPS_SIM_NABI32
+# define __NR_pkey_mprotect 6327
+# endif
+# if _MIPS_SIM == _MIPS_SIM_ABI64
+# define __NR_pkey_mprotect 5323
+# endif
+# else
+# warning "__NR_pkey_mprotect not defined for your architecture"
+# endif
+# endif
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_STATX
+# ifndef __NR_statx
+# if defined __aarch64__ || defined __arm__
+# define __NR_statx 397
+# elif defined __alpha__
+# define __NR_statx 522
+# elif defined __i386__ || defined __powerpc64__
+# define __NR_statx 383
+# elif defined __sparc__
+# define __NR_statx 360
+# elif defined __x86_64__
+# define __NR_statx 332
+# else
+# warning "__NR_statx not defined for your architecture"
+# endif
+# endif
+
+struct statx;
+#endif
+
+/* This typedef is supposed to be always defined. */
+typedef struct statx struct_statx;
+
+#if !HAVE_STATX
+static inline ssize_t missing_statx(int dfd, const char *filename, unsigned flags, unsigned int mask, struct statx *buffer) {
+# ifdef __NR_statx
+ return syscall(__NR_statx, dfd, filename, flags, mask, buffer);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define statx missing_statx
+#endif
diff --git a/src/basic/missing_timerfd.h b/src/basic/missing_timerfd.h
new file mode 100644
index 0000000..6b04044
--- /dev/null
+++ b/src/basic/missing_timerfd.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/timerfd.h>
+
+#ifndef TFD_TIMER_CANCEL_ON_SET
+#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
+#endif
diff --git a/src/basic/missing_type.h b/src/basic/missing_type.h
new file mode 100644
index 0000000..bf8a6ca
--- /dev/null
+++ b/src/basic/missing_type.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <uchar.h>
+
+#if !HAVE_CHAR32_T
+#define char32_t uint32_t
+#endif
+
+#if !HAVE_CHAR16_T
+#define char16_t uint16_t
+#endif
diff --git a/src/basic/missing_vxcan.h b/src/basic/missing_vxcan.h
new file mode 100644
index 0000000..be430f7
--- /dev/null
+++ b/src/basic/missing_vxcan.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !HAVE_LINUX_CAN_VXCAN_H /* linux@a8f820a380a2a06fc4fe1a54159067958f800929 (4.12) */
+enum {
+ VXCAN_INFO_UNSPEC,
+ VXCAN_INFO_PEER,
+
+ __VXCAN_INFO_MAX
+#define VXCAN_INFO_MAX (__VXCAN_INFO_MAX - 1)
+};
+#endif
diff --git a/src/basic/mkdir-label.c b/src/basic/mkdir-label.c
new file mode 100644
index 0000000..0eba7fc
--- /dev/null
+++ b/src/basic/mkdir-label.c
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "label.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+
+int mkdir_label(const char *path, mode_t mode) {
+ int r;
+
+ assert(path);
+
+ r = mac_selinux_create_file_prepare(path, S_IFDIR);
+ if (r < 0)
+ return r;
+
+ r = mkdir_errno_wrapper(path, mode);
+ mac_selinux_create_file_clear();
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix(path, 0);
+}
+
+int mkdirat_label(int dirfd, const char *path, mode_t mode) {
+ int r;
+
+ assert(path);
+
+ r = mac_selinux_create_file_prepare_at(dirfd, path, S_IFDIR);
+ if (r < 0)
+ return r;
+
+ r = mkdirat_errno_wrapper(dirfd, path, mode);
+ mac_selinux_create_file_clear();
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix_at(dirfd, path, 0);
+}
+
+int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
+ return mkdir_safe_internal(path, mode, uid, gid, flags, mkdir_label);
+}
+
+int mkdir_parents_label(const char *path, mode_t mode) {
+ return mkdir_parents_internal(NULL, path, mode, mkdir_label);
+}
+
+int mkdir_p_label(const char *path, mode_t mode) {
+ return mkdir_p_internal(NULL, path, mode, mkdir_label);
+}
diff --git a/src/basic/mkdir.c b/src/basic/mkdir.c
new file mode 100644
index 0000000..4bb65d5
--- /dev/null
+++ b/src/basic/mkdir.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "user-util.h"
+
+int mkdir_safe_internal(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir) {
+ struct stat st;
+ int r;
+
+ assert(_mkdir != mkdir);
+
+ if (_mkdir(path, mode) >= 0) {
+ r = chmod_and_chown(path, mode, uid, gid);
+ if (r < 0)
+ return r;
+ }
+
+ if (lstat(path, &st) < 0)
+ return -errno;
+
+ if ((flags & MKDIR_FOLLOW_SYMLINK) && S_ISLNK(st.st_mode)) {
+ _cleanup_free_ char *p = NULL;
+
+ r = chase_symlinks(path, NULL, CHASE_NONEXISTENT, &p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return mkdir_safe_internal(p, mode, uid, gid,
+ flags & ~MKDIR_FOLLOW_SYMLINK,
+ _mkdir);
+
+ if (lstat(p, &st) < 0)
+ return -errno;
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ log_full(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG,
+ "Path \"%s\" already exists and is not a directory, refusing.", path);
+ return -ENOTDIR;
+ }
+ if ((st.st_mode & 0007) > (mode & 0007) ||
+ (st.st_mode & 0070) > (mode & 0070) ||
+ (st.st_mode & 0700) > (mode & 0700)) {
+ log_full(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG,
+ "Directory \"%s\" already exists, but has mode %04o that is too permissive (%04o was requested), refusing.",
+ path, st.st_mode & 0777, mode);
+ return -EEXIST;
+ }
+ if ((uid != UID_INVALID && st.st_uid != uid) ||
+ (gid != GID_INVALID && st.st_gid != gid)) {
+ char u[DECIMAL_STR_MAX(uid_t)] = "-", g[DECIMAL_STR_MAX(gid_t)] = "-";
+
+ if (uid != UID_INVALID)
+ xsprintf(u, UID_FMT, uid);
+ if (gid != UID_INVALID)
+ xsprintf(g, GID_FMT, gid);
+ log_full(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG,
+ "Directory \"%s\" already exists, but is owned by "UID_FMT":"GID_FMT" (%s:%s was requested), refusing.",
+ path, st.st_uid, st.st_gid, u, g);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+int mkdir_errno_wrapper(const char *pathname, mode_t mode) {
+ if (mkdir(pathname, mode) < 0)
+ return -errno;
+ return 0;
+}
+
+int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode) {
+ if (mkdirat(dirfd, pathname, mode) < 0)
+ return -errno;
+ return 0;
+}
+
+int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
+ return mkdir_safe_internal(path, mode, uid, gid, flags, mkdir_errno_wrapper);
+}
+
+int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir) {
+ const char *p, *e;
+ int r;
+
+ assert(path);
+ assert(_mkdir != mkdir);
+
+ if (prefix && !path_startswith(path, prefix))
+ return -ENOTDIR;
+
+ /* return immediately if directory exists */
+ e = strrchr(path, '/');
+ if (!e)
+ return -EINVAL;
+
+ if (e == path)
+ return 0;
+
+ p = strndupa(path, e - path);
+ r = is_dir(p, true);
+ if (r > 0)
+ return 0;
+ if (r == 0)
+ return -ENOTDIR;
+
+ /* create every parent directory in the path, except the last component */
+ p = path + strspn(path, "/");
+ for (;;) {
+ char t[strlen(path) + 1];
+
+ e = p + strcspn(p, "/");
+ p = e + strspn(e, "/");
+
+ /* Is this the last component? If so, then we're done */
+ if (*p == 0)
+ return 0;
+
+ memcpy(t, path, e - path);
+ t[e-path] = 0;
+
+ if (prefix && path_startswith(prefix, t))
+ continue;
+
+ r = _mkdir(t, mode);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+}
+
+int mkdir_parents(const char *path, mode_t mode) {
+ return mkdir_parents_internal(NULL, path, mode, mkdir_errno_wrapper);
+}
+
+int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir) {
+ int r;
+
+ /* Like mkdir -p */
+
+ assert(_mkdir != mkdir);
+
+ r = mkdir_parents_internal(prefix, path, mode, _mkdir);
+ if (r < 0)
+ return r;
+
+ r = _mkdir(path, mode);
+ if (r < 0 && (r != -EEXIST || is_dir(path, true) <= 0))
+ return r;
+
+ return 0;
+}
+
+int mkdir_p(const char *path, mode_t mode) {
+ return mkdir_p_internal(NULL, path, mode, mkdir_errno_wrapper);
+}
diff --git a/src/basic/mkdir.h b/src/basic/mkdir.h
new file mode 100644
index 0000000..eb54853
--- /dev/null
+++ b/src/basic/mkdir.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+typedef enum MkdirFlags {
+ MKDIR_FOLLOW_SYMLINK = 1 << 0,
+ MKDIR_WARN_MODE = 1 << 1,
+} MkdirFlags;
+
+int mkdir_errno_wrapper(const char *pathname, mode_t mode);
+int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode);
+int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
+int mkdir_parents(const char *path, mode_t mode);
+int mkdir_p(const char *path, mode_t mode);
+
+/* mandatory access control(MAC) versions */
+int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
+int mkdir_parents_label(const char *path, mode_t mode);
+int mkdir_p_label(const char *path, mode_t mode);
+
+/* internally used */
+typedef int (*mkdir_func_t)(const char *pathname, mode_t mode);
+int mkdir_safe_internal(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir);
+int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir);
+int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir);
diff --git a/src/basic/mountpoint-util.c b/src/basic/mountpoint-util.c
new file mode 100644
index 0000000..1e946a0
--- /dev/null
+++ b/src/basic/mountpoint-util.c
@@ -0,0 +1,444 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio_ext.h>
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "missing.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+
+/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
+ * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
+ * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
+ * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
+ * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
+ * with large file handles anyway. */
+#define ORIGINAL_MAX_HANDLE_SZ 128
+
+int name_to_handle_at_loop(
+ int fd,
+ const char *path,
+ struct file_handle **ret_handle,
+ int *ret_mnt_id,
+ int flags) {
+
+ _cleanup_free_ struct file_handle *h = NULL;
+ size_t n = ORIGINAL_MAX_HANDLE_SZ;
+
+ /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
+ * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
+ * start value, it is not an upper bound on the buffer size required.
+ *
+ * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
+ * as NULL if there's no interest in either. */
+
+ for (;;) {
+ int mnt_id = -1;
+
+ h = malloc0(offsetof(struct file_handle, f_handle) + n);
+ if (!h)
+ return -ENOMEM;
+
+ h->handle_bytes = n;
+
+ if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
+
+ if (ret_handle)
+ *ret_handle = TAKE_PTR(h);
+
+ if (ret_mnt_id)
+ *ret_mnt_id = mnt_id;
+
+ return 0;
+ }
+ if (errno != EOVERFLOW)
+ return -errno;
+
+ if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
+
+ /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
+ * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
+ * be filled in, and the caller was interested in only the mount ID an nothing else. */
+
+ *ret_mnt_id = mnt_id;
+ return 0;
+ }
+
+ /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
+ * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
+ * buffer. In that case propagate EOVERFLOW */
+ if (h->handle_bytes <= n)
+ return -EOVERFLOW;
+
+ /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
+ n = h->handle_bytes;
+ if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
+ return -EOVERFLOW;
+
+ h = mfree(h);
+ }
+}
+
+static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
+ char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
+ _cleanup_free_ char *fdinfo = NULL;
+ _cleanup_close_ int subfd = -1;
+ char *p;
+ int r;
+
+ if ((flags & AT_EMPTY_PATH) && isempty(filename))
+ xsprintf(path, "/proc/self/fdinfo/%i", fd);
+ else {
+ subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
+ if (subfd < 0)
+ return -errno;
+
+ xsprintf(path, "/proc/self/fdinfo/%i", subfd);
+ }
+
+ r = read_full_file(path, &fdinfo, NULL);
+ if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
+ return -EOPNOTSUPP;
+ if (r < 0)
+ return r;
+
+ p = startswith(fdinfo, "mnt_id:");
+ if (!p) {
+ p = strstr(fdinfo, "\nmnt_id:");
+ if (!p) /* The mnt_id field is a relatively new addition */
+ return -EOPNOTSUPP;
+
+ p += 8;
+ }
+
+ p += strspn(p, WHITESPACE);
+ p[strcspn(p, WHITESPACE)] = 0;
+
+ return safe_atoi(p, mnt_id);
+}
+
+int fd_is_mount_point(int fd, const char *filename, int flags) {
+ _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
+ int mount_id = -1, mount_id_parent = -1;
+ bool nosupp = false, check_st_dev = true;
+ struct stat a, b;
+ int r;
+
+ assert(fd >= 0);
+ assert(filename);
+
+ /* First we will try the name_to_handle_at() syscall, which
+ * tells us the mount id and an opaque file "handle". It is
+ * not supported everywhere though (kernel compile-time
+ * option, not all file systems are hooked up). If it works
+ * the mount id is usually good enough to tell us whether
+ * something is a mount point.
+ *
+ * If that didn't work we will try to read the mount id from
+ * /proc/self/fdinfo/<fd>. This is almost as good as
+ * name_to_handle_at(), however, does not return the
+ * opaque file handle. The opaque file handle is pretty useful
+ * to detect the root directory, which we should always
+ * consider a mount point. Hence we use this only as
+ * fallback. Exporting the mnt_id in fdinfo is a pretty recent
+ * kernel addition.
+ *
+ * As last fallback we do traditional fstat() based st_dev
+ * comparisons. This is how things were traditionally done,
+ * but unionfs breaks this since it exposes file
+ * systems with a variety of st_dev reported. Also, btrfs
+ * subvolumes have different st_dev, even though they aren't
+ * real mounts of their own. */
+
+ r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
+ if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
+ /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
+ * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
+ * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
+ * (EINVAL): fall back to simpler logic. */
+ goto fallback_fdinfo;
+ else if (r == -EOPNOTSUPP)
+ /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
+ * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
+ * logic */
+ nosupp = true;
+ else if (r < 0)
+ return r;
+
+ r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
+ if (r == -EOPNOTSUPP) {
+ if (nosupp)
+ /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
+ goto fallback_fdinfo;
+ else
+ /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
+ * it must be a mount point. */
+ return 1;
+ } else if (r < 0)
+ return r;
+
+ /* The parent can do name_to_handle_at() but the
+ * directory we are interested in can't? If so, it
+ * must be a mount point. */
+ if (nosupp)
+ return 1;
+
+ /* If the file handle for the directory we are
+ * interested in and its parent are identical, we
+ * assume this is the root directory, which is a mount
+ * point. */
+
+ if (h->handle_bytes == h_parent->handle_bytes &&
+ h->handle_type == h_parent->handle_type &&
+ memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
+ return 1;
+
+ return mount_id != mount_id_parent;
+
+fallback_fdinfo:
+ r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
+ if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
+ goto fallback_fstat;
+ if (r < 0)
+ return r;
+
+ r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
+ if (r < 0)
+ return r;
+
+ if (mount_id != mount_id_parent)
+ return 1;
+
+ /* Hmm, so, the mount ids are the same. This leaves one
+ * special case though for the root file system. For that,
+ * let's see if the parent directory has the same inode as we
+ * are interested in. Hence, let's also do fstat() checks now,
+ * too, but avoid the st_dev comparisons, since they aren't
+ * that useful on unionfs mounts. */
+ check_st_dev = false;
+
+fallback_fstat:
+ /* yay for fstatat() taking a different set of flags than the other
+ * _at() above */
+ if (flags & AT_SYMLINK_FOLLOW)
+ flags &= ~AT_SYMLINK_FOLLOW;
+ else
+ flags |= AT_SYMLINK_NOFOLLOW;
+ if (fstatat(fd, filename, &a, flags) < 0)
+ return -errno;
+
+ if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
+ return -errno;
+
+ /* A directory with same device and inode as its parent? Must
+ * be the root directory */
+ if (a.st_dev == b.st_dev &&
+ a.st_ino == b.st_ino)
+ return 1;
+
+ return check_st_dev && (a.st_dev != b.st_dev);
+}
+
+/* flags can be AT_SYMLINK_FOLLOW or 0 */
+int path_is_mount_point(const char *t, const char *root, int flags) {
+ _cleanup_free_ char *canonical = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(t);
+ assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
+
+ if (path_equal(t, "/"))
+ return 1;
+
+ /* we need to resolve symlinks manually, we can't just rely on
+ * fd_is_mount_point() to do that for us; if we have a structure like
+ * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
+ * look at needs to be /usr, not /. */
+ if (flags & AT_SYMLINK_FOLLOW) {
+ r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
+ if (r < 0)
+ return r;
+
+ t = canonical;
+ }
+
+ fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ return fd_is_mount_point(fd, last_path_component(t), flags);
+}
+
+int path_get_mnt_id(const char *path, int *ret) {
+ int r;
+
+ r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
+ if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
+ return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
+
+ return r;
+}
+
+bool fstype_is_network(const char *fstype) {
+ const char *x;
+
+ x = startswith(fstype, "fuse.");
+ if (x)
+ fstype = x;
+
+ return STR_IN_SET(fstype,
+ "afs",
+ "cifs",
+ "smbfs",
+ "sshfs",
+ "ncpfs",
+ "ncp",
+ "nfs",
+ "nfs4",
+ "gfs",
+ "gfs2",
+ "glusterfs",
+ "pvfs2", /* OrangeFS */
+ "ocfs2",
+ "lustre");
+}
+
+bool fstype_is_api_vfs(const char *fstype) {
+ return STR_IN_SET(fstype,
+ "autofs",
+ "bpf",
+ "cgroup",
+ "cgroup2",
+ "configfs",
+ "cpuset",
+ "debugfs",
+ "devpts",
+ "devtmpfs",
+ "efivarfs",
+ "fusectl",
+ "hugetlbfs",
+ "mqueue",
+ "proc",
+ "pstore",
+ "ramfs",
+ "securityfs",
+ "sysfs",
+ "tmpfs",
+ "tracefs");
+}
+
+bool fstype_is_ro(const char *fstype) {
+ /* All Linux file systems that are necessarily read-only */
+ return STR_IN_SET(fstype,
+ "DM_verity_hash",
+ "iso9660",
+ "squashfs");
+}
+
+bool fstype_can_discard(const char *fstype) {
+ return STR_IN_SET(fstype,
+ "btrfs",
+ "ext4",
+ "vfat",
+ "xfs");
+}
+
+bool fstype_can_uid_gid(const char *fstype) {
+
+ /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
+ * current and future. */
+
+ return STR_IN_SET(fstype,
+ "adfs",
+ "fat",
+ "hfs",
+ "hpfs",
+ "iso9660",
+ "msdos",
+ "ntfs",
+ "vfat");
+}
+
+int dev_is_devtmpfs(void) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ int mount_id, r;
+ char *e;
+
+ r = path_get_mnt_id("/dev", &mount_id);
+ if (r < 0)
+ return r;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ int mid;
+
+ r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (sscanf(line, "%i", &mid) != 1)
+ continue;
+
+ if (mid != mount_id)
+ continue;
+
+ e = strstr(line, " - ");
+ if (!e)
+ continue;
+
+ /* accept any name that starts with the currently expected type */
+ if (startswith(e + 3, "devtmpfs"))
+ return true;
+ }
+
+ return false;
+}
+
+const char *mount_propagation_flags_to_string(unsigned long flags) {
+
+ switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
+ case 0:
+ return "";
+ case MS_SHARED:
+ return "shared";
+ case MS_SLAVE:
+ return "slave";
+ case MS_PRIVATE:
+ return "private";
+ }
+
+ return NULL;
+}
+
+int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
+
+ if (isempty(name))
+ *ret = 0;
+ else if (streq(name, "shared"))
+ *ret = MS_SHARED;
+ else if (streq(name, "slave"))
+ *ret = MS_SLAVE;
+ else if (streq(name, "private"))
+ *ret = MS_PRIVATE;
+ else
+ return -EINVAL;
+ return 0;
+}
diff --git a/src/basic/mountpoint-util.h b/src/basic/mountpoint-util.h
new file mode 100644
index 0000000..5398836
--- /dev/null
+++ b/src/basic/mountpoint-util.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+int name_to_handle_at_loop(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, int flags);
+
+int path_get_mnt_id(const char *path, int *ret);
+
+int fd_is_mount_point(int fd, const char *filename, int flags);
+int path_is_mount_point(const char *path, const char *root, int flags);
+
+bool fstype_is_network(const char *fstype);
+bool fstype_is_api_vfs(const char *fstype);
+bool fstype_is_ro(const char *fsype);
+bool fstype_can_discard(const char *fstype);
+bool fstype_can_uid_gid(const char *fstype);
+
+int dev_is_devtmpfs(void);
+
+const char *mount_propagation_flags_to_string(unsigned long flags);
+int mount_propagation_flags_from_string(const char *name, unsigned long *ret);
diff --git a/src/basic/nss-util.h b/src/basic/nss-util.h
new file mode 100644
index 0000000..2045175
--- /dev/null
+++ b/src/basic/nss-util.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <grp.h>
+#include <netdb.h>
+#include <nss.h>
+#include <pwd.h>
+#include <resolv.h>
+
+#define NSS_SIGNALS_BLOCK SIGALRM,SIGVTALRM,SIGPIPE,SIGCHLD,SIGTSTP,SIGIO,SIGHUP,SIGUSR1,SIGUSR2,SIGPROF,SIGURG,SIGWINCH
+
+#ifndef DEPRECATED_RES_USE_INET6
+# define DEPRECATED_RES_USE_INET6 0x00002000
+#endif
+
+#define NSS_GETHOSTBYNAME_PROTOTYPES(module) \
+enum nss_status _nss_##module##_gethostbyname4_r( \
+ const char *name, \
+ struct gaih_addrtuple **pat, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop, \
+ int32_t *ttlp) _public_; \
+enum nss_status _nss_##module##_gethostbyname3_r( \
+ const char *name, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop, \
+ int32_t *ttlp, \
+ char **canonp) _public_; \
+enum nss_status _nss_##module##_gethostbyname2_r( \
+ const char *name, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) _public_; \
+enum nss_status _nss_##module##_gethostbyname_r( \
+ const char *name, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) _public_
+
+#define NSS_GETHOSTBYADDR_PROTOTYPES(module) \
+enum nss_status _nss_##module##_gethostbyaddr2_r( \
+ const void* addr, socklen_t len, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop, \
+ int32_t *ttlp) _public_; \
+enum nss_status _nss_##module##_gethostbyaddr_r( \
+ const void* addr, socklen_t len, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) _public_
+
+#define NSS_GETHOSTBYNAME_FALLBACKS(module) \
+enum nss_status _nss_##module##_gethostbyname2_r( \
+ const char *name, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) { \
+ return _nss_##module##_gethostbyname3_r( \
+ name, \
+ af, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL, \
+ NULL); \
+} \
+enum nss_status _nss_##module##_gethostbyname_r( \
+ const char *name, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) { \
+ enum nss_status ret = NSS_STATUS_NOTFOUND; \
+ \
+ if (_res.options & DEPRECATED_RES_USE_INET6) \
+ ret = _nss_##module##_gethostbyname3_r( \
+ name, \
+ AF_INET6, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL, \
+ NULL); \
+ if (ret == NSS_STATUS_NOTFOUND) \
+ ret = _nss_##module##_gethostbyname3_r( \
+ name, \
+ AF_INET, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL, \
+ NULL); \
+ return ret; \
+}
+
+#define NSS_GETHOSTBYADDR_FALLBACKS(module) \
+enum nss_status _nss_##module##_gethostbyaddr_r( \
+ const void* addr, socklen_t len, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) { \
+ return _nss_##module##_gethostbyaddr2_r( \
+ addr, len, \
+ af, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL); \
+}
+
+#define NSS_GETPW_PROTOTYPES(module) \
+enum nss_status _nss_##module##_getpwnam_r( \
+ const char *name, \
+ struct passwd *pwd, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_; \
+enum nss_status _nss_##module##_getpwuid_r( \
+ uid_t uid, \
+ struct passwd *pwd, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_
+
+#define NSS_GETGR_PROTOTYPES(module) \
+enum nss_status _nss_##module##_getgrnam_r( \
+ const char *name, \
+ struct group *gr, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_; \
+enum nss_status _nss_##module##_getgrgid_r( \
+ gid_t gid, \
+ struct group *gr, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_
+
+typedef enum nss_status (*_nss_gethostbyname4_r_t)(
+ const char *name,
+ struct gaih_addrtuple **pat,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp);
+
+typedef enum nss_status (*_nss_gethostbyname3_r_t)(
+ const char *name,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp,
+ char **canonp);
+
+typedef enum nss_status (*_nss_gethostbyname2_r_t)(
+ const char *name,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop);
+
+typedef enum nss_status (*_nss_gethostbyname_r_t)(
+ const char *name,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop);
+
+typedef enum nss_status (*_nss_gethostbyaddr2_r_t)(
+ const void* addr, socklen_t len,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp);
+typedef enum nss_status (*_nss_gethostbyaddr_r_t)(
+ const void* addr, socklen_t len,
+ int af,
+ struct hostent *host,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop);
diff --git a/src/basic/ordered-set.c b/src/basic/ordered-set.c
new file mode 100644
index 0000000..ed9ba77
--- /dev/null
+++ b/src/basic/ordered-set.c
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "ordered-set.h"
+#include "strv.h"
+
+int ordered_set_consume(OrderedSet *s, void *p) {
+ int r;
+
+ r = ordered_set_put(s, p);
+ if (r <= 0)
+ free(p);
+
+ return r;
+}
+
+int ordered_set_put_strdup(OrderedSet *s, const char *p) {
+ char *c;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ c = strdup(p);
+ if (!c)
+ return -ENOMEM;
+
+ r = ordered_set_consume(s, c);
+ if (r == -EEXIST)
+ return 0;
+
+ return r;
+}
+
+int ordered_set_put_strdupv(OrderedSet *s, char **l) {
+ int n = 0, r;
+ char **i;
+
+ STRV_FOREACH(i, l) {
+ r = ordered_set_put_strdup(s, *i);
+ if (r < 0)
+ return r;
+
+ n += r;
+ }
+
+ return n;
+}
diff --git a/src/basic/ordered-set.h b/src/basic/ordered-set.h
new file mode 100644
index 0000000..7cbb718
--- /dev/null
+++ b/src/basic/ordered-set.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "hashmap.h"
+
+typedef struct OrderedSet OrderedSet;
+
+static inline OrderedSet* ordered_set_new(const struct hash_ops *ops) {
+ return (OrderedSet*) ordered_hashmap_new(ops);
+}
+
+static inline int ordered_set_ensure_allocated(OrderedSet **s, const struct hash_ops *ops) {
+ if (*s)
+ return 0;
+
+ *s = ordered_set_new(ops);
+ if (!*s)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline OrderedSet* ordered_set_free(OrderedSet *s) {
+ return (OrderedSet*) ordered_hashmap_free((OrderedHashmap*) s);
+}
+
+static inline OrderedSet* ordered_set_free_free(OrderedSet *s) {
+ return (OrderedSet*) ordered_hashmap_free_free((OrderedHashmap*) s);
+}
+
+static inline int ordered_set_put(OrderedSet *s, void *p) {
+ return ordered_hashmap_put((OrderedHashmap*) s, p, p);
+}
+
+static inline bool ordered_set_isempty(OrderedSet *s) {
+ return ordered_hashmap_isempty((OrderedHashmap*) s);
+}
+
+static inline bool ordered_set_iterate(OrderedSet *s, Iterator *i, void **value) {
+ return ordered_hashmap_iterate((OrderedHashmap*) s, i, value, NULL);
+}
+
+static inline void* ordered_set_remove(OrderedSet *s, void *p) {
+ return ordered_hashmap_remove((OrderedHashmap*) s, p);
+}
+
+static inline void* ordered_set_steal_first(OrderedSet *s) {
+ return ordered_hashmap_steal_first((OrderedHashmap*) s);
+}
+
+int ordered_set_consume(OrderedSet *s, void *p);
+int ordered_set_put_strdup(OrderedSet *s, const char *p);
+int ordered_set_put_strdupv(OrderedSet *s, char **l);
+
+#define ORDERED_SET_FOREACH(e, s, i) \
+ for ((i) = ITERATOR_FIRST; ordered_set_iterate((s), &(i), (void**)&(e)); )
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedSet*, ordered_set_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedSet*, ordered_set_free_free);
+
+#define _cleanup_ordered_set_free_ _cleanup_(ordered_set_freep)
+#define _cleanup_ordered_set_free_free_ _cleanup_(ordered_set_free_freep)
diff --git a/src/basic/parse-util.c b/src/basic/parse-util.c
new file mode 100644
index 0000000..87724af
--- /dev/null
+++ b/src/basic/parse-util.c
@@ -0,0 +1,779 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/oom.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "errno-list.h"
+#include "extract-word.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+
+int parse_boolean(const char *v) {
+ if (!v)
+ return -EINVAL;
+
+ if (streq(v, "1") || strcaseeq(v, "yes") || strcaseeq(v, "y") || strcaseeq(v, "true") || strcaseeq(v, "t") || strcaseeq(v, "on"))
+ return 1;
+ else if (streq(v, "0") || strcaseeq(v, "no") || strcaseeq(v, "n") || strcaseeq(v, "false") || strcaseeq(v, "f") || strcaseeq(v, "off"))
+ return 0;
+
+ return -EINVAL;
+}
+
+int parse_pid(const char *s, pid_t* ret_pid) {
+ unsigned long ul = 0;
+ pid_t pid;
+ int r;
+
+ assert(s);
+ assert(ret_pid);
+
+ r = safe_atolu(s, &ul);
+ if (r < 0)
+ return r;
+
+ pid = (pid_t) ul;
+
+ if ((unsigned long) pid != ul)
+ return -ERANGE;
+
+ if (!pid_is_valid(pid))
+ return -ERANGE;
+
+ *ret_pid = pid;
+ return 0;
+}
+
+int parse_mode(const char *s, mode_t *ret) {
+ char *x;
+ long l;
+
+ assert(s);
+ assert(ret);
+
+ s += strspn(s, WHITESPACE);
+ if (s[0] == '-')
+ return -ERANGE;
+
+ errno = 0;
+ l = strtol(s, &x, 8);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (l < 0 || l > 07777)
+ return -ERANGE;
+
+ *ret = (mode_t) l;
+ return 0;
+}
+
+int parse_ifindex(const char *s, int *ret) {
+ int ifi, r;
+
+ r = safe_atoi(s, &ifi);
+ if (r < 0)
+ return r;
+ if (ifi <= 0)
+ return -EINVAL;
+
+ *ret = ifi;
+ return 0;
+}
+
+int parse_mtu(int family, const char *s, uint32_t *ret) {
+ uint64_t u;
+ size_t m;
+ int r;
+
+ r = parse_size(s, 1024, &u);
+ if (r < 0)
+ return r;
+
+ if (u > UINT32_MAX)
+ return -ERANGE;
+
+ if (family == AF_INET6)
+ m = IPV6_MIN_MTU; /* This is 1280 */
+ else
+ m = IPV4_MIN_MTU; /* For all other protocols, including 'unspecified' we assume the IPv4 minimal MTU */
+
+ if (u < m)
+ return -ERANGE;
+
+ *ret = (uint32_t) u;
+ return 0;
+}
+
+int parse_size(const char *t, uint64_t base, uint64_t *size) {
+
+ /* Soo, sometimes we want to parse IEC binary suffixes, and
+ * sometimes SI decimal suffixes. This function can parse
+ * both. Which one is the right way depends on the
+ * context. Wikipedia suggests that SI is customary for
+ * hardware metrics and network speeds, while IEC is
+ * customary for most data sizes used by software and volatile
+ * (RAM) memory. Hence be careful which one you pick!
+ *
+ * In either case we use just K, M, G as suffix, and not Ki,
+ * Mi, Gi or so (as IEC would suggest). That's because that's
+ * frickin' ugly. But this means you really need to make sure
+ * to document which base you are parsing when you use this
+ * call. */
+
+ struct table {
+ const char *suffix;
+ unsigned long long factor;
+ };
+
+ static const struct table iec[] = {
+ { "E", 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL },
+ { "P", 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL },
+ { "T", 1024ULL*1024ULL*1024ULL*1024ULL },
+ { "G", 1024ULL*1024ULL*1024ULL },
+ { "M", 1024ULL*1024ULL },
+ { "K", 1024ULL },
+ { "B", 1ULL },
+ { "", 1ULL },
+ };
+
+ static const struct table si[] = {
+ { "E", 1000ULL*1000ULL*1000ULL*1000ULL*1000ULL*1000ULL },
+ { "P", 1000ULL*1000ULL*1000ULL*1000ULL*1000ULL },
+ { "T", 1000ULL*1000ULL*1000ULL*1000ULL },
+ { "G", 1000ULL*1000ULL*1000ULL },
+ { "M", 1000ULL*1000ULL },
+ { "K", 1000ULL },
+ { "B", 1ULL },
+ { "", 1ULL },
+ };
+
+ const struct table *table;
+ const char *p;
+ unsigned long long r = 0;
+ unsigned n_entries, start_pos = 0;
+
+ assert(t);
+ assert(IN_SET(base, 1000, 1024));
+ assert(size);
+
+ if (base == 1000) {
+ table = si;
+ n_entries = ELEMENTSOF(si);
+ } else {
+ table = iec;
+ n_entries = ELEMENTSOF(iec);
+ }
+
+ p = t;
+ do {
+ unsigned long long l, tmp;
+ double frac = 0;
+ char *e;
+ unsigned i;
+
+ p += strspn(p, WHITESPACE);
+
+ errno = 0;
+ l = strtoull(p, &e, 10);
+ if (errno > 0)
+ return -errno;
+ if (e == p)
+ return -EINVAL;
+ if (*p == '-')
+ return -ERANGE;
+
+ if (*e == '.') {
+ e++;
+
+ /* strtoull() itself would accept space/+/- */
+ if (*e >= '0' && *e <= '9') {
+ unsigned long long l2;
+ char *e2;
+
+ l2 = strtoull(e, &e2, 10);
+ if (errno > 0)
+ return -errno;
+
+ /* Ignore failure. E.g. 10.M is valid */
+ frac = l2;
+ for (; e < e2; e++)
+ frac /= 10;
+ }
+ }
+
+ e += strspn(e, WHITESPACE);
+
+ for (i = start_pos; i < n_entries; i++)
+ if (startswith(e, table[i].suffix))
+ break;
+
+ if (i >= n_entries)
+ return -EINVAL;
+
+ if (l + (frac > 0) > ULLONG_MAX / table[i].factor)
+ return -ERANGE;
+
+ tmp = l * table[i].factor + (unsigned long long) (frac * table[i].factor);
+ if (tmp > ULLONG_MAX - r)
+ return -ERANGE;
+
+ r += tmp;
+ if ((unsigned long long) (uint64_t) r != r)
+ return -ERANGE;
+
+ p = e + strlen(table[i].suffix);
+
+ start_pos = i + 1;
+
+ } while (*p);
+
+ *size = r;
+
+ return 0;
+}
+
+int parse_range(const char *t, unsigned *lower, unsigned *upper) {
+ _cleanup_free_ char *word = NULL;
+ unsigned l, u;
+ int r;
+
+ assert(lower);
+ assert(upper);
+
+ /* Extract the lower bound. */
+ r = extract_first_word(&t, &word, "-", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ r = safe_atou(word, &l);
+ if (r < 0)
+ return r;
+
+ /* Check for the upper bound and extract it if needed */
+ if (!t)
+ /* Single number with no dashes. */
+ u = l;
+ else if (!*t)
+ /* Trailing dash is an error. */
+ return -EINVAL;
+ else {
+ r = safe_atou(t, &u);
+ if (r < 0)
+ return r;
+ }
+
+ *lower = l;
+ *upper = u;
+ return 0;
+}
+
+int parse_errno(const char *t) {
+ int r, e;
+
+ assert(t);
+
+ r = errno_from_name(t);
+ if (r > 0)
+ return r;
+
+ r = safe_atoi(t, &e);
+ if (r < 0)
+ return r;
+
+ /* 0 is also allowed here */
+ if (!errno_is_valid(e) && e != 0)
+ return -ERANGE;
+
+ return e;
+}
+
+int parse_syscall_and_errno(const char *in, char **name, int *error) {
+ _cleanup_free_ char *n = NULL;
+ char *p;
+ int e = -1;
+
+ assert(in);
+ assert(name);
+ assert(error);
+
+ /*
+ * This parse "syscall:errno" like "uname:EILSEQ", "@sync:255".
+ * If errno is omitted, then error is set to -1.
+ * Empty syscall name is not allowed.
+ * Here, we do not check that the syscall name is valid or not.
+ */
+
+ p = strchr(in, ':');
+ if (p) {
+ e = parse_errno(p + 1);
+ if (e < 0)
+ return e;
+
+ n = strndup(in, p - in);
+ } else
+ n = strdup(in);
+
+ if (!n)
+ return -ENOMEM;
+
+ if (isempty(n))
+ return -EINVAL;
+
+ *error = e;
+ *name = TAKE_PTR(n);
+
+ return 0;
+}
+
+char *format_bytes(char *buf, size_t l, uint64_t t) {
+ unsigned i;
+
+ /* This only does IEC units so far */
+
+ static const struct {
+ const char *suffix;
+ uint64_t factor;
+ } table[] = {
+ { "E", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "P", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "T", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "G", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "M", UINT64_C(1024)*UINT64_C(1024) },
+ { "K", UINT64_C(1024) },
+ };
+
+ if (t == (uint64_t) -1)
+ return NULL;
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+
+ if (t >= table[i].factor) {
+ snprintf(buf, l,
+ "%" PRIu64 ".%" PRIu64 "%s",
+ t / table[i].factor,
+ ((t*UINT64_C(10)) / table[i].factor) % UINT64_C(10),
+ table[i].suffix);
+
+ goto finish;
+ }
+ }
+
+ snprintf(buf, l, "%" PRIu64 "B", t);
+
+finish:
+ buf[l-1] = 0;
+ return buf;
+
+}
+
+int safe_atou_full(const char *s, unsigned base, unsigned *ret_u) {
+ char *x = NULL;
+ unsigned long l;
+
+ assert(s);
+ assert(ret_u);
+ assert(base <= 16);
+
+ /* strtoul() is happy to parse negative values, and silently
+ * converts them to unsigned values without generating an
+ * error. We want a clean error, hence let's look for the "-"
+ * prefix on our own, and generate an error. But let's do so
+ * only after strtoul() validated that the string is clean
+ * otherwise, so that we return EINVAL preferably over
+ * ERANGE. */
+
+ s += strspn(s, WHITESPACE);
+
+ errno = 0;
+ l = strtoul(s, &x, base);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (s[0] == '-')
+ return -ERANGE;
+ if ((unsigned long) (unsigned) l != l)
+ return -ERANGE;
+
+ *ret_u = (unsigned) l;
+ return 0;
+}
+
+int safe_atoi(const char *s, int *ret_i) {
+ char *x = NULL;
+ long l;
+
+ assert(s);
+ assert(ret_i);
+
+ errno = 0;
+ l = strtol(s, &x, 0);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if ((long) (int) l != l)
+ return -ERANGE;
+
+ *ret_i = (int) l;
+ return 0;
+}
+
+int safe_atollu(const char *s, long long unsigned *ret_llu) {
+ char *x = NULL;
+ unsigned long long l;
+
+ assert(s);
+ assert(ret_llu);
+
+ s += strspn(s, WHITESPACE);
+
+ errno = 0;
+ l = strtoull(s, &x, 0);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (*s == '-')
+ return -ERANGE;
+
+ *ret_llu = l;
+ return 0;
+}
+
+int safe_atolli(const char *s, long long int *ret_lli) {
+ char *x = NULL;
+ long long l;
+
+ assert(s);
+ assert(ret_lli);
+
+ errno = 0;
+ l = strtoll(s, &x, 0);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+
+ *ret_lli = l;
+ return 0;
+}
+
+int safe_atou8(const char *s, uint8_t *ret) {
+ char *x = NULL;
+ unsigned long l;
+
+ assert(s);
+ assert(ret);
+
+ s += strspn(s, WHITESPACE);
+
+ errno = 0;
+ l = strtoul(s, &x, 0);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (s[0] == '-')
+ return -ERANGE;
+ if ((unsigned long) (uint8_t) l != l)
+ return -ERANGE;
+
+ *ret = (uint8_t) l;
+ return 0;
+}
+
+int safe_atou16_full(const char *s, unsigned base, uint16_t *ret) {
+ char *x = NULL;
+ unsigned long l;
+
+ assert(s);
+ assert(ret);
+ assert(base <= 16);
+
+ s += strspn(s, WHITESPACE);
+
+ errno = 0;
+ l = strtoul(s, &x, base);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (s[0] == '-')
+ return -ERANGE;
+ if ((unsigned long) (uint16_t) l != l)
+ return -ERANGE;
+
+ *ret = (uint16_t) l;
+ return 0;
+}
+
+int safe_atoi16(const char *s, int16_t *ret) {
+ char *x = NULL;
+ long l;
+
+ assert(s);
+ assert(ret);
+
+ errno = 0;
+ l = strtol(s, &x, 0);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if ((long) (int16_t) l != l)
+ return -ERANGE;
+
+ *ret = (int16_t) l;
+ return 0;
+}
+
+int safe_atod(const char *s, double *ret_d) {
+ _cleanup_(freelocalep) locale_t loc = (locale_t) 0;
+ char *x = NULL;
+ double d = 0;
+
+ assert(s);
+ assert(ret_d);
+
+ loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0);
+ if (loc == (locale_t) 0)
+ return -errno;
+
+ errno = 0;
+ d = strtod_l(s, &x, loc);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+
+ *ret_d = (double) d;
+ return 0;
+}
+
+int parse_fractional_part_u(const char **p, size_t digits, unsigned *res) {
+ size_t i;
+ unsigned val = 0;
+ const char *s;
+
+ s = *p;
+
+ /* accept any number of digits, strtoull is limited to 19 */
+ for (i=0; i < digits; i++,s++) {
+ if (*s < '0' || *s > '9') {
+ if (i == 0)
+ return -EINVAL;
+
+ /* too few digits, pad with 0 */
+ for (; i < digits; i++)
+ val *= 10;
+
+ break;
+ }
+
+ val *= 10;
+ val += *s - '0';
+ }
+
+ /* maybe round up */
+ if (*s >= '5' && *s <= '9')
+ val++;
+
+ s += strspn(s, DIGITS);
+
+ *p = s;
+ *res = val;
+
+ return 0;
+}
+
+int parse_percent_unbounded(const char *p) {
+ const char *pc, *n;
+ int r, v;
+
+ pc = endswith(p, "%");
+ if (!pc)
+ return -EINVAL;
+
+ n = strndupa(p, pc - p);
+ r = safe_atoi(n, &v);
+ if (r < 0)
+ return r;
+ if (v < 0)
+ return -ERANGE;
+
+ return v;
+}
+
+int parse_percent(const char *p) {
+ int v;
+
+ v = parse_percent_unbounded(p);
+ if (v > 100)
+ return -ERANGE;
+
+ return v;
+}
+
+int parse_permille_unbounded(const char *p) {
+ const char *pc, *pm, *dot, *n;
+ int r, q, v;
+
+ pm = endswith(p, "‰");
+ if (pm) {
+ n = strndupa(p, pm - p);
+ r = safe_atoi(n, &v);
+ if (r < 0)
+ return r;
+ if (v < 0)
+ return -ERANGE;
+ } else {
+ pc = endswith(p, "%");
+ if (!pc)
+ return -EINVAL;
+
+ dot = memchr(p, '.', pc - p);
+ if (dot) {
+ if (dot + 2 != pc)
+ return -EINVAL;
+ if (dot[1] < '0' || dot[1] > '9')
+ return -EINVAL;
+ q = dot[1] - '0';
+ n = strndupa(p, dot - p);
+ } else {
+ q = 0;
+ n = strndupa(p, pc - p);
+ }
+ r = safe_atoi(n, &v);
+ if (r < 0)
+ return r;
+ if (v < 0)
+ return -ERANGE;
+ if (v > (INT_MAX - q) / 10)
+ return -ERANGE;
+
+ v = v * 10 + q;
+ }
+
+ return v;
+}
+
+int parse_permille(const char *p) {
+ int v;
+
+ v = parse_permille_unbounded(p);
+ if (v > 1000)
+ return -ERANGE;
+
+ return v;
+}
+
+int parse_nice(const char *p, int *ret) {
+ int n, r;
+
+ r = safe_atoi(p, &n);
+ if (r < 0)
+ return r;
+
+ if (!nice_is_valid(n))
+ return -ERANGE;
+
+ *ret = n;
+ return 0;
+}
+
+int parse_ip_port(const char *s, uint16_t *ret) {
+ uint16_t l;
+ int r;
+
+ r = safe_atou16(s, &l);
+ if (r < 0)
+ return r;
+
+ if (l == 0)
+ return -EINVAL;
+
+ *ret = (uint16_t) l;
+
+ return 0;
+}
+
+int parse_ip_port_range(const char *s, uint16_t *low, uint16_t *high) {
+ unsigned l, h;
+ int r;
+
+ r = parse_range(s, &l, &h);
+ if (r < 0)
+ return r;
+
+ if (l <= 0 || l > 65535 || h <= 0 || h > 65535)
+ return -EINVAL;
+
+ if (h < l)
+ return -EINVAL;
+
+ *low = l;
+ *high = h;
+
+ return 0;
+}
+
+int parse_dev(const char *s, dev_t *ret) {
+ const char *major;
+ unsigned x, y;
+ size_t n;
+ int r;
+
+ n = strspn(s, DIGITS);
+ if (n == 0)
+ return -EINVAL;
+ if (s[n] != ':')
+ return -EINVAL;
+
+ major = strndupa(s, n);
+ r = safe_atou(major, &x);
+ if (r < 0)
+ return r;
+
+ r = safe_atou(s + n + 1, &y);
+ if (r < 0)
+ return r;
+
+ if (!DEVICE_MAJOR_VALID(x) || !DEVICE_MINOR_VALID(y))
+ return -ERANGE;
+
+ *ret = makedev(x, y);
+ return 0;
+}
+
+int parse_oom_score_adjust(const char *s, int *ret) {
+ int r, v;
+
+ assert(s);
+ assert(ret);
+
+ r = safe_atoi(s, &v);
+ if (r < 0)
+ return r;
+
+ if (v < OOM_SCORE_ADJ_MIN || v > OOM_SCORE_ADJ_MAX)
+ return -ERANGE;
+
+ *ret = v;
+ return 0;
+}
diff --git a/src/basic/parse-util.h b/src/basic/parse-util.h
new file mode 100644
index 0000000..e47641b
--- /dev/null
+++ b/src/basic/parse-util.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+#define MODE_INVALID ((mode_t) -1)
+
+int parse_boolean(const char *v) _pure_;
+int parse_dev(const char *s, dev_t *ret);
+int parse_pid(const char *s, pid_t* ret_pid);
+int parse_mode(const char *s, mode_t *ret);
+int parse_ifindex(const char *s, int *ret);
+int parse_mtu(int family, const char *s, uint32_t *ret);
+
+int parse_size(const char *t, uint64_t base, uint64_t *size);
+int parse_range(const char *t, unsigned *lower, unsigned *upper);
+int parse_errno(const char *t);
+int parse_syscall_and_errno(const char *in, char **name, int *error);
+
+#define FORMAT_BYTES_MAX 8
+char *format_bytes(char *buf, size_t l, uint64_t t);
+
+int safe_atou_full(const char *s, unsigned base, unsigned *ret_u);
+
+static inline int safe_atou(const char *s, unsigned *ret_u) {
+ return safe_atou_full(s, 0, ret_u);
+}
+
+int safe_atoi(const char *s, int *ret_i);
+int safe_atollu(const char *s, unsigned long long *ret_u);
+int safe_atolli(const char *s, long long int *ret_i);
+
+int safe_atou8(const char *s, uint8_t *ret);
+
+int safe_atou16_full(const char *s, unsigned base, uint16_t *ret);
+
+static inline int safe_atou16(const char *s, uint16_t *ret) {
+ return safe_atou16_full(s, 0, ret);
+}
+
+static inline int safe_atoux16(const char *s, uint16_t *ret) {
+ return safe_atou16_full(s, 16, ret);
+}
+
+int safe_atoi16(const char *s, int16_t *ret);
+
+static inline int safe_atou32(const char *s, uint32_t *ret_u) {
+ assert_cc(sizeof(uint32_t) == sizeof(unsigned));
+ return safe_atou(s, (unsigned*) ret_u);
+}
+
+static inline int safe_atoi32(const char *s, int32_t *ret_i) {
+ assert_cc(sizeof(int32_t) == sizeof(int));
+ return safe_atoi(s, (int*) ret_i);
+}
+
+static inline int safe_atou64(const char *s, uint64_t *ret_u) {
+ assert_cc(sizeof(uint64_t) == sizeof(unsigned long long));
+ return safe_atollu(s, (unsigned long long*) ret_u);
+}
+
+static inline int safe_atoi64(const char *s, int64_t *ret_i) {
+ assert_cc(sizeof(int64_t) == sizeof(long long int));
+ return safe_atolli(s, (long long int*) ret_i);
+}
+
+#if LONG_MAX == INT_MAX
+static inline int safe_atolu(const char *s, unsigned long *ret_u) {
+ assert_cc(sizeof(unsigned long) == sizeof(unsigned));
+ return safe_atou(s, (unsigned*) ret_u);
+}
+static inline int safe_atoli(const char *s, long int *ret_u) {
+ assert_cc(sizeof(long int) == sizeof(int));
+ return safe_atoi(s, (int*) ret_u);
+}
+#else
+static inline int safe_atolu(const char *s, unsigned long *ret_u) {
+ assert_cc(sizeof(unsigned long) == sizeof(unsigned long long));
+ return safe_atollu(s, (unsigned long long*) ret_u);
+}
+static inline int safe_atoli(const char *s, long int *ret_u) {
+ assert_cc(sizeof(long int) == sizeof(long long int));
+ return safe_atolli(s, (long long int*) ret_u);
+}
+#endif
+
+#if SIZE_MAX == UINT_MAX
+static inline int safe_atozu(const char *s, size_t *ret_u) {
+ assert_cc(sizeof(size_t) == sizeof(unsigned));
+ return safe_atou(s, (unsigned *) ret_u);
+}
+#else
+static inline int safe_atozu(const char *s, size_t *ret_u) {
+ assert_cc(sizeof(size_t) == sizeof(long unsigned));
+ return safe_atolu(s, ret_u);
+}
+#endif
+
+int safe_atod(const char *s, double *ret_d);
+
+int parse_fractional_part_u(const char **s, size_t digits, unsigned *res);
+
+int parse_percent_unbounded(const char *p);
+int parse_percent(const char *p);
+
+int parse_permille_unbounded(const char *p);
+int parse_permille(const char *p);
+
+int parse_nice(const char *p, int *ret);
+
+int parse_ip_port(const char *s, uint16_t *ret);
+int parse_ip_port_range(const char *s, uint16_t *low, uint16_t *high);
+
+int parse_oom_score_adjust(const char *s, int *ret);
diff --git a/src/basic/path-util.c b/src/basic/path-util.c
new file mode 100644
index 0000000..2215173
--- /dev/null
+++ b/src/basic/path-util.c
@@ -0,0 +1,1150 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the
+ * POSIX version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+
+bool path_is_absolute(const char *p) {
+ return p[0] == '/';
+}
+
+bool is_path(const char *p) {
+ return !!strchr(p, '/');
+}
+
+int path_split_and_make_absolute(const char *p, char ***ret) {
+ char **l;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ l = strv_split(p, ":");
+ if (!l)
+ return -ENOMEM;
+
+ r = path_strv_make_absolute_cwd(l);
+ if (r < 0) {
+ strv_free(l);
+ return r;
+ }
+
+ *ret = l;
+ return r;
+}
+
+char *path_make_absolute(const char *p, const char *prefix) {
+ assert(p);
+
+ /* Makes every item in the list an absolute path by prepending
+ * the prefix, if specified and necessary */
+
+ if (path_is_absolute(p) || isempty(prefix))
+ return strdup(p);
+
+ if (endswith(prefix, "/"))
+ return strjoin(prefix, p);
+ else
+ return strjoin(prefix, "/", p);
+}
+
+int safe_getcwd(char **ret) {
+ char *cwd;
+
+ cwd = get_current_dir_name();
+ if (!cwd)
+ return negative_errno();
+
+ /* Let's make sure the directory is really absolute, to protect us from the logic behind
+ * CVE-2018-1000001 */
+ if (cwd[0] != '/') {
+ free(cwd);
+ return -ENOMEDIUM;
+ }
+
+ *ret = cwd;
+ return 0;
+}
+
+int path_make_absolute_cwd(const char *p, char **ret) {
+ char *c;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ /* Similar to path_make_absolute(), but prefixes with the
+ * current working directory. */
+
+ if (path_is_absolute(p))
+ c = strdup(p);
+ else {
+ _cleanup_free_ char *cwd = NULL;
+
+ r = safe_getcwd(&cwd);
+ if (r < 0)
+ return r;
+
+ c = path_join(cwd, p);
+ }
+ if (!c)
+ return -ENOMEM;
+
+ *ret = c;
+ return 0;
+}
+
+int path_make_relative(const char *from_dir, const char *to_path, char **_r) {
+ char *f, *t, *r, *p;
+ unsigned n_parents = 0;
+
+ assert(from_dir);
+ assert(to_path);
+ assert(_r);
+
+ /* Strips the common part, and adds ".." elements as necessary. */
+
+ if (!path_is_absolute(from_dir) || !path_is_absolute(to_path))
+ return -EINVAL;
+
+ f = strdupa(from_dir);
+ t = strdupa(to_path);
+
+ path_simplify(f, true);
+ path_simplify(t, true);
+
+ /* Skip the common part. */
+ for (;;) {
+ size_t a, b;
+
+ f += *f == '/';
+ t += *t == '/';
+
+ if (!*f) {
+ if (!*t)
+ /* from_dir equals to_path. */
+ r = strdup(".");
+ else
+ /* from_dir is a parent directory of to_path. */
+ r = strdup(t);
+ if (!r)
+ return -ENOMEM;
+
+ *_r = r;
+ return 0;
+ }
+
+ if (!*t)
+ break;
+
+ a = strcspn(f, "/");
+ b = strcspn(t, "/");
+
+ if (a != b || memcmp(f, t, a) != 0)
+ break;
+
+ f += a;
+ t += b;
+ }
+
+ /* If we're here, then "from_dir" has one or more elements that need to
+ * be replaced with "..". */
+
+ /* Count the number of necessary ".." elements. */
+ for (; *f;) {
+ size_t w;
+
+ w = strcspn(f, "/");
+
+ /* If this includes ".." we can't do a simple series of "..", refuse */
+ if (w == 2 && f[0] == '.' && f[1] == '.')
+ return -EINVAL;
+
+ /* Count number of elements */
+ n_parents++;
+
+ f += w;
+ f += *f == '/';
+ }
+
+ r = new(char, n_parents * 3 + strlen(t) + 1);
+ if (!r)
+ return -ENOMEM;
+
+ for (p = r; n_parents > 0; n_parents--)
+ p = mempcpy(p, "../", 3);
+
+ if (*t)
+ strcpy(p, t);
+ else
+ /* Remove trailing slash */
+ *(--p) = 0;
+
+ *_r = r;
+ return 0;
+}
+
+int path_strv_make_absolute_cwd(char **l) {
+ char **s;
+ int r;
+
+ /* Goes through every item in the string list and makes it
+ * absolute. This works in place and won't rollback any
+ * changes on failure. */
+
+ STRV_FOREACH(s, l) {
+ char *t;
+
+ r = path_make_absolute_cwd(*s, &t);
+ if (r < 0)
+ return r;
+
+ path_simplify(t, false);
+ free_and_replace(*s, t);
+ }
+
+ return 0;
+}
+
+char **path_strv_resolve(char **l, const char *root) {
+ char **s;
+ unsigned k = 0;
+ bool enomem = false;
+ int r;
+
+ if (strv_isempty(l))
+ return l;
+
+ /* Goes through every item in the string list and canonicalize
+ * the path. This works in place and won't rollback any
+ * changes on failure. */
+
+ STRV_FOREACH(s, l) {
+ _cleanup_free_ char *orig = NULL;
+ char *t, *u;
+
+ if (!path_is_absolute(*s)) {
+ free(*s);
+ continue;
+ }
+
+ if (root) {
+ orig = *s;
+ t = prefix_root(root, orig);
+ if (!t) {
+ enomem = true;
+ continue;
+ }
+ } else
+ t = *s;
+
+ r = chase_symlinks(t, root, 0, &u);
+ if (r == -ENOENT) {
+ if (root) {
+ u = TAKE_PTR(orig);
+ free(t);
+ } else
+ u = t;
+ } else if (r < 0) {
+ free(t);
+
+ if (r == -ENOMEM)
+ enomem = true;
+
+ continue;
+ } else if (root) {
+ char *x;
+
+ free(t);
+ x = path_startswith(u, root);
+ if (x) {
+ /* restore the slash if it was lost */
+ if (!startswith(x, "/"))
+ *(--x) = '/';
+
+ t = strdup(x);
+ free(u);
+ if (!t) {
+ enomem = true;
+ continue;
+ }
+ u = t;
+ } else {
+ /* canonicalized path goes outside of
+ * prefix, keep the original path instead */
+ free_and_replace(u, orig);
+ }
+ } else
+ free(t);
+
+ l[k++] = u;
+ }
+
+ l[k] = NULL;
+
+ if (enomem)
+ return NULL;
+
+ return l;
+}
+
+char **path_strv_resolve_uniq(char **l, const char *root) {
+
+ if (strv_isempty(l))
+ return l;
+
+ if (!path_strv_resolve(l, root))
+ return NULL;
+
+ return strv_uniq(l);
+}
+
+char *path_simplify(char *path, bool kill_dots) {
+ char *f, *t;
+ bool slash = false, ignore_slash = false, absolute;
+
+ assert(path);
+
+ /* Removes redundant inner and trailing slashes. Also removes unnecessary dots
+ * if kill_dots is true. Modifies the passed string in-place.
+ *
+ * ///foo//./bar/. becomes /foo/./bar/. (if kill_dots is false)
+ * ///foo//./bar/. becomes /foo/bar (if kill_dots is true)
+ * .//./foo//./bar/. becomes ././foo/./bar/. (if kill_dots is false)
+ * .//./foo//./bar/. becomes foo/bar (if kill_dots is true)
+ */
+
+ if (isempty(path))
+ return path;
+
+ absolute = path_is_absolute(path);
+
+ f = path;
+ if (kill_dots && *f == '.' && IN_SET(f[1], 0, '/')) {
+ ignore_slash = true;
+ f++;
+ }
+
+ for (t = path; *f; f++) {
+
+ if (*f == '/') {
+ slash = true;
+ continue;
+ }
+
+ if (slash) {
+ if (kill_dots && *f == '.' && IN_SET(f[1], 0, '/'))
+ continue;
+
+ slash = false;
+ if (ignore_slash)
+ ignore_slash = false;
+ else
+ *(t++) = '/';
+ }
+
+ *(t++) = *f;
+ }
+
+ /* Special rule, if we stripped everything, we either need a "/" (for the root directory)
+ * or "." for the current directory */
+ if (t == path) {
+ if (absolute)
+ *(t++) = '/';
+ else
+ *(t++) = '.';
+ }
+
+ *t = 0;
+ return path;
+}
+
+char* path_startswith(const char *path, const char *prefix) {
+ assert(path);
+ assert(prefix);
+
+ /* Returns a pointer to the start of the first component after the parts matched by
+ * the prefix, iff
+ * - both paths are absolute or both paths are relative,
+ * and
+ * - each component in prefix in turn matches a component in path at the same position.
+ * An empty string will be returned when the prefix and path are equivalent.
+ *
+ * Returns NULL otherwise.
+ */
+
+ if ((path[0] == '/') != (prefix[0] == '/'))
+ return NULL;
+
+ for (;;) {
+ size_t a, b;
+
+ path += strspn(path, "/");
+ prefix += strspn(prefix, "/");
+
+ if (*prefix == 0)
+ return (char*) path;
+
+ if (*path == 0)
+ return NULL;
+
+ a = strcspn(path, "/");
+ b = strcspn(prefix, "/");
+
+ if (a != b)
+ return NULL;
+
+ if (memcmp(path, prefix, a) != 0)
+ return NULL;
+
+ path += a;
+ prefix += b;
+ }
+}
+
+int path_compare(const char *a, const char *b) {
+ int d;
+
+ assert(a);
+ assert(b);
+
+ /* A relative path and an absolute path must not compare as equal.
+ * Which one is sorted before the other does not really matter.
+ * Here a relative path is ordered before an absolute path. */
+ d = (a[0] == '/') - (b[0] == '/');
+ if (d != 0)
+ return d;
+
+ for (;;) {
+ size_t j, k;
+
+ a += strspn(a, "/");
+ b += strspn(b, "/");
+
+ if (*a == 0 && *b == 0)
+ return 0;
+
+ /* Order prefixes first: "/foo" before "/foo/bar" */
+ if (*a == 0)
+ return -1;
+ if (*b == 0)
+ return 1;
+
+ j = strcspn(a, "/");
+ k = strcspn(b, "/");
+
+ /* Alphabetical sort: "/foo/aaa" before "/foo/b" */
+ d = memcmp(a, b, MIN(j, k));
+ if (d != 0)
+ return (d > 0) - (d < 0); /* sign of d */
+
+ /* Sort "/foo/a" before "/foo/aaa" */
+ d = (j > k) - (j < k); /* sign of (j - k) */
+ if (d != 0)
+ return d;
+
+ a += j;
+ b += k;
+ }
+}
+
+bool path_equal(const char *a, const char *b) {
+ return path_compare(a, b) == 0;
+}
+
+bool path_equal_or_files_same(const char *a, const char *b, int flags) {
+ return path_equal(a, b) || files_same(a, b, flags) > 0;
+}
+
+char* path_join_internal(const char *first, ...) {
+ char *joined, *q;
+ const char *p;
+ va_list ap;
+ bool slash;
+ size_t sz;
+
+ /* Joins all listed strings until the sentinel and places a "/" between them unless the strings end/begin
+ * already with one so that it is unnecessary. Note that slashes which are already duplicate won't be
+ * removed. The string returned is hence always equal to or longer than the sum of the lengths of each
+ * individual string.
+ *
+ * Note: any listed empty string is simply skipped. This can be useful for concatenating strings of which some
+ * are optional.
+ *
+ * Examples:
+ *
+ * path_join("foo", "bar") → "foo/bar"
+ * path_join("foo/", "bar") → "foo/bar"
+ * path_join("", "foo", "", "bar", "") → "foo/bar" */
+
+ sz = strlen_ptr(first);
+ va_start(ap, first);
+ while ((p = va_arg(ap, char*)) != (const char*) -1)
+ if (!isempty(p))
+ sz += 1 + strlen(p);
+ va_end(ap);
+
+ joined = new(char, sz + 1);
+ if (!joined)
+ return NULL;
+
+ if (!isempty(first)) {
+ q = stpcpy(joined, first);
+ slash = endswith(first, "/");
+ } else {
+ /* Skip empty items */
+ joined[0] = 0;
+ q = joined;
+ slash = true; /* no need to generate a slash anymore */
+ }
+
+ va_start(ap, first);
+ while ((p = va_arg(ap, char*)) != (const char*) -1) {
+ if (isempty(p))
+ continue;
+
+ if (!slash && p[0] != '/')
+ *(q++) = '/';
+
+ q = stpcpy(q, p);
+ slash = endswith(p, "/");
+ }
+ va_end(ap);
+
+ return joined;
+}
+
+int find_binary(const char *name, char **ret) {
+ int last_error, r;
+ const char *p;
+
+ assert(name);
+
+ if (is_path(name)) {
+ if (access(name, X_OK) < 0)
+ return -errno;
+
+ if (ret) {
+ r = path_make_absolute_cwd(name, ret);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+ }
+
+ /**
+ * Plain getenv, not secure_getenv, because we want
+ * to actually allow the user to pick the binary.
+ */
+ p = getenv("PATH");
+ if (!p)
+ p = DEFAULT_PATH;
+
+ last_error = -ENOENT;
+
+ for (;;) {
+ _cleanup_free_ char *j = NULL, *element = NULL;
+
+ r = extract_first_word(&p, &element, ":", EXTRACT_RELAX|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!path_is_absolute(element))
+ continue;
+
+ j = strjoin(element, "/", name);
+ if (!j)
+ return -ENOMEM;
+
+ if (access(j, X_OK) >= 0) {
+ /* Found it! */
+
+ if (ret) {
+ *ret = path_simplify(j, false);
+ j = NULL;
+ }
+
+ return 0;
+ }
+
+ last_error = -errno;
+ }
+
+ return last_error;
+}
+
+bool paths_check_timestamp(const char* const* paths, usec_t *timestamp, bool update) {
+ bool changed = false;
+ const char* const* i;
+
+ assert(timestamp);
+
+ if (!paths)
+ return false;
+
+ STRV_FOREACH(i, paths) {
+ struct stat stats;
+ usec_t u;
+
+ if (stat(*i, &stats) < 0)
+ continue;
+
+ u = timespec_load(&stats.st_mtim);
+
+ /* first check */
+ if (*timestamp >= u)
+ continue;
+
+ log_debug("timestamp of '%s' changed", *i);
+
+ /* update timestamp */
+ if (update) {
+ *timestamp = u;
+ changed = true;
+ } else
+ return true;
+ }
+
+ return changed;
+}
+
+static int binary_is_good(const char *binary) {
+ _cleanup_free_ char *p = NULL, *d = NULL;
+ int r;
+
+ r = find_binary(binary, &p);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ /* An fsck that is linked to /bin/true is a non-existent
+ * fsck */
+
+ r = readlink_malloc(p, &d);
+ if (r == -EINVAL) /* not a symlink */
+ return 1;
+ if (r < 0)
+ return r;
+
+ return !PATH_IN_SET(d, "true"
+ "/bin/true",
+ "/usr/bin/true",
+ "/dev/null");
+}
+
+int fsck_exists(const char *fstype) {
+ const char *checker;
+
+ assert(fstype);
+
+ if (streq(fstype, "auto"))
+ return -EINVAL;
+
+ checker = strjoina("fsck.", fstype);
+ return binary_is_good(checker);
+}
+
+int mkfs_exists(const char *fstype) {
+ const char *mkfs;
+
+ assert(fstype);
+
+ if (streq(fstype, "auto"))
+ return -EINVAL;
+
+ mkfs = strjoina("mkfs.", fstype);
+ return binary_is_good(mkfs);
+}
+
+char *prefix_root(const char *root, const char *path) {
+ char *n, *p;
+ size_t l;
+
+ /* If root is passed, prefixes path with it. Otherwise returns
+ * it as is. */
+
+ assert(path);
+
+ /* First, drop duplicate prefixing slashes from the path */
+ while (path[0] == '/' && path[1] == '/')
+ path++;
+
+ if (empty_or_root(root))
+ return strdup(path);
+
+ l = strlen(root) + 1 + strlen(path) + 1;
+
+ n = new(char, l);
+ if (!n)
+ return NULL;
+
+ p = stpcpy(n, root);
+
+ while (p > n && p[-1] == '/')
+ p--;
+
+ if (path[0] != '/')
+ *(p++) = '/';
+
+ strcpy(p, path);
+ return n;
+}
+
+int parse_path_argument_and_warn(const char *path, bool suppress_root, char **arg) {
+ char *p;
+ int r;
+
+ /*
+ * This function is intended to be used in command line
+ * parsers, to handle paths that are passed in. It makes the
+ * path absolute, and reduces it to NULL if omitted or
+ * root (the latter optionally).
+ *
+ * NOTE THAT THIS WILL FREE THE PREVIOUS ARGUMENT POINTER ON
+ * SUCCESS! Hence, do not pass in uninitialized pointers.
+ */
+
+ if (isempty(path)) {
+ *arg = mfree(*arg);
+ return 0;
+ }
+
+ r = path_make_absolute_cwd(path, &p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse path \"%s\" and make it absolute: %m", path);
+
+ path_simplify(p, false);
+ if (suppress_root && empty_or_root(p))
+ p = mfree(p);
+
+ free_and_replace(*arg, p);
+
+ return 0;
+}
+
+char* dirname_malloc(const char *path) {
+ char *d, *dir, *dir2;
+
+ assert(path);
+
+ d = strdup(path);
+ if (!d)
+ return NULL;
+
+ dir = dirname(d);
+ assert(dir);
+
+ if (dir == d)
+ return d;
+
+ dir2 = strdup(dir);
+ free(d);
+
+ return dir2;
+}
+
+const char *last_path_component(const char *path) {
+
+ /* Finds the last component of the path, preserving the optional trailing slash that signifies a directory.
+ *
+ * a/b/c → c
+ * a/b/c/ → c/
+ * x → x
+ * x/ → x/
+ * /y → y
+ * /y/ → y/
+ * / → /
+ * // → /
+ * /foo/a → a
+ * /foo/a/ → a/
+ *
+ * Also, the empty string is mapped to itself.
+ *
+ * This is different than basename(), which returns "" when a trailing slash is present.
+ */
+
+ unsigned l, k;
+
+ if (!path)
+ return NULL;
+
+ l = k = strlen(path);
+ if (l == 0) /* special case — an empty string */
+ return path;
+
+ while (k > 0 && path[k-1] == '/')
+ k--;
+
+ if (k == 0) /* the root directory */
+ return path + l - 1;
+
+ while (k > 0 && path[k-1] != '/')
+ k--;
+
+ return path + k;
+}
+
+int path_extract_filename(const char *p, char **ret) {
+ _cleanup_free_ char *a = NULL;
+ const char *c, *e = NULL, *q;
+
+ /* Extracts the filename part (i.e. right-most component) from a path, i.e. string that passes
+ * filename_is_valid(). A wrapper around last_path_component(), but eats up trailing slashes. */
+
+ if (!p)
+ return -EINVAL;
+
+ c = last_path_component(p);
+
+ for (q = c; *q != 0; q++)
+ if (*q != '/')
+ e = q + 1;
+
+ if (!e) /* no valid character? */
+ return -EINVAL;
+
+ a = strndup(c, e - c);
+ if (!a)
+ return -ENOMEM;
+
+ if (!filename_is_valid(a))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(a);
+
+ return 0;
+}
+
+bool filename_is_valid(const char *p) {
+ const char *e;
+
+ if (isempty(p))
+ return false;
+
+ if (dot_or_dot_dot(p))
+ return false;
+
+ e = strchrnul(p, '/');
+ if (*e != 0)
+ return false;
+
+ if (e - p > FILENAME_MAX) /* FILENAME_MAX is counted *without* the trailing NUL byte */
+ return false;
+
+ return true;
+}
+
+bool path_is_valid(const char *p) {
+
+ if (isempty(p))
+ return false;
+
+ if (strlen(p) >= PATH_MAX) /* PATH_MAX is counted *with* the trailing NUL byte */
+ return false;
+
+ return true;
+}
+
+bool path_is_normalized(const char *p) {
+
+ if (!path_is_valid(p))
+ return false;
+
+ if (dot_or_dot_dot(p))
+ return false;
+
+ if (startswith(p, "../") || endswith(p, "/..") || strstr(p, "/../"))
+ return false;
+
+ if (startswith(p, "./") || endswith(p, "/.") || strstr(p, "/./"))
+ return false;
+
+ if (strstr(p, "//"))
+ return false;
+
+ return true;
+}
+
+char *file_in_same_dir(const char *path, const char *filename) {
+ char *e, *ret;
+ size_t k;
+
+ assert(path);
+ assert(filename);
+
+ /* This removes the last component of path and appends
+ * filename, unless the latter is absolute anyway or the
+ * former isn't */
+
+ if (path_is_absolute(filename))
+ return strdup(filename);
+
+ e = strrchr(path, '/');
+ if (!e)
+ return strdup(filename);
+
+ k = strlen(filename);
+ ret = new(char, (e + 1 - path) + k + 1);
+ if (!ret)
+ return NULL;
+
+ memcpy(mempcpy(ret, path, e + 1 - path), filename, k + 1);
+ return ret;
+}
+
+bool hidden_or_backup_file(const char *filename) {
+ const char *p;
+
+ assert(filename);
+
+ if (filename[0] == '.' ||
+ streq(filename, "lost+found") ||
+ streq(filename, "aquota.user") ||
+ streq(filename, "aquota.group") ||
+ endswith(filename, "~"))
+ return true;
+
+ p = strrchr(filename, '.');
+ if (!p)
+ return false;
+
+ /* Please, let's not add more entries to the list below. If external projects think it's a good idea to come up
+ * with always new suffixes and that everybody else should just adjust to that, then it really should be on
+ * them. Hence, in future, let's not add any more entries. Instead, let's ask those packages to instead adopt
+ * one of the generic suffixes/prefixes for hidden files or backups, possibly augmented with an additional
+ * string. Specifically: there's now:
+ *
+ * The generic suffixes "~" and ".bak" for backup files
+ * The generic prefix "." for hidden files
+ *
+ * Thus, if a new package manager "foopkg" wants its own set of ".foopkg-new", ".foopkg-old", ".foopkg-dist"
+ * or so registered, let's refuse that and ask them to use ".foopkg.new", ".foopkg.old" or ".foopkg~" instead.
+ */
+
+ return STR_IN_SET(p + 1,
+ "rpmnew",
+ "rpmsave",
+ "rpmorig",
+ "dpkg-old",
+ "dpkg-new",
+ "dpkg-tmp",
+ "dpkg-dist",
+ "dpkg-bak",
+ "dpkg-backup",
+ "dpkg-remove",
+ "ucf-new",
+ "ucf-old",
+ "ucf-dist",
+ "swp",
+ "bak",
+ "old",
+ "new");
+}
+
+bool is_device_path(const char *path) {
+
+ /* Returns true on paths that likely refer to a device, either by path in sysfs or to something in /dev */
+
+ return PATH_STARTSWITH_SET(path, "/dev/", "/sys/");
+}
+
+bool valid_device_node_path(const char *path) {
+
+ /* Some superficial checks whether the specified path is a valid device node path, all without looking at the
+ * actual device node. */
+
+ if (!PATH_STARTSWITH_SET(path, "/dev/", "/run/systemd/inaccessible/"))
+ return false;
+
+ if (endswith(path, "/")) /* can't be a device node if it ends in a slash */
+ return false;
+
+ return path_is_normalized(path);
+}
+
+bool valid_device_allow_pattern(const char *path) {
+ assert(path);
+
+ /* Like valid_device_node_path(), but also allows full-subsystem expressions, like DeviceAllow= and DeviceDeny=
+ * accept it */
+
+ if (STARTSWITH_SET(path, "block-", "char-"))
+ return true;
+
+ return valid_device_node_path(path);
+}
+
+int systemd_installation_has_version(const char *root, unsigned minimal_version) {
+ const char *pattern;
+ int r;
+
+ /* Try to guess if systemd installation is later than the specified version. This
+ * is hacky and likely to yield false negatives, particularly if the installation
+ * is non-standard. False positives should be relatively rare.
+ */
+
+ NULSTR_FOREACH(pattern,
+ /* /lib works for systems without usr-merge, and for systems with a sane
+ * usr-merge, where /lib is a symlink to /usr/lib. /usr/lib is necessary
+ * for Gentoo which does a merge without making /lib a symlink.
+ */
+ "lib/systemd/libsystemd-shared-*.so\0"
+ "lib64/systemd/libsystemd-shared-*.so\0"
+ "usr/lib/systemd/libsystemd-shared-*.so\0"
+ "usr/lib64/systemd/libsystemd-shared-*.so\0") {
+
+ _cleanup_strv_free_ char **names = NULL;
+ _cleanup_free_ char *path = NULL;
+ char *c, **name;
+
+ path = prefix_root(root, pattern);
+ if (!path)
+ return -ENOMEM;
+
+ r = glob_extend(&names, path);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+
+ assert_se(c = endswith(path, "*.so"));
+ *c = '\0'; /* truncate the glob part */
+
+ STRV_FOREACH(name, names) {
+ /* This is most likely to run only once, hence let's not optimize anything. */
+ char *t, *t2;
+ unsigned version;
+
+ t = startswith(*name, path);
+ if (!t)
+ continue;
+
+ t2 = endswith(t, ".so");
+ if (!t2)
+ continue;
+
+ t2[0] = '\0'; /* truncate the suffix */
+
+ r = safe_atou(t, &version);
+ if (r < 0) {
+ log_debug_errno(r, "Found libsystemd shared at \"%s.so\", but failed to parse version: %m", *name);
+ continue;
+ }
+
+ log_debug("Found libsystemd shared at \"%s.so\", version %u (%s).",
+ *name, version,
+ version >= minimal_version ? "OK" : "too old");
+ if (version >= minimal_version)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool dot_or_dot_dot(const char *path) {
+ if (!path)
+ return false;
+ if (path[0] != '.')
+ return false;
+ if (path[1] == 0)
+ return true;
+ if (path[1] != '.')
+ return false;
+
+ return path[2] == 0;
+}
+
+bool empty_or_root(const char *root) {
+
+ /* For operations relative to some root directory, returns true if the specified root directory is redundant,
+ * i.e. either / or NULL or the empty string or any equivalent. */
+
+ if (!root)
+ return true;
+
+ return root[strspn(root, "/")] == 0;
+}
+
+int path_simplify_and_warn(
+ char *path,
+ unsigned flag,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue) {
+
+ bool absolute, fatal = flag & PATH_CHECK_FATAL;
+
+ assert(!FLAGS_SET(flag, PATH_CHECK_ABSOLUTE | PATH_CHECK_RELATIVE));
+
+ if (!utf8_is_valid(path)) {
+ log_syntax_invalid_utf8(unit, LOG_ERR, filename, line, path);
+ return -EINVAL;
+ }
+
+ if (flag & (PATH_CHECK_ABSOLUTE | PATH_CHECK_RELATIVE)) {
+ absolute = path_is_absolute(path);
+
+ if (!absolute && (flag & PATH_CHECK_ABSOLUTE)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "%s= path is not absolute%s: %s",
+ lvalue, fatal ? "" : ", ignoring", path);
+ return -EINVAL;
+ }
+
+ if (absolute && (flag & PATH_CHECK_RELATIVE)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "%s= path is absolute%s: %s",
+ lvalue, fatal ? "" : ", ignoring", path);
+ return -EINVAL;
+ }
+ }
+
+ path_simplify(path, true);
+
+ if (!path_is_normalized(path)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "%s= path is not normalized%s: %s",
+ lvalue, fatal ? "" : ", ignoring", path);
+ return -EINVAL;
+ }
+
+ if (!path_is_valid(path)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "%s= path has invalid length (%zu bytes)%s.",
+ lvalue, strlen(path), fatal ? "" : ", ignoring");
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/src/basic/path-util.h b/src/basic/path-util.h
new file mode 100644
index 0000000..86c5a57
--- /dev/null
+++ b/src/basic/path-util.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <alloca.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+#define PATH_SPLIT_SBIN_BIN(x) x "sbin:" x "bin"
+#define PATH_SPLIT_SBIN_BIN_NULSTR(x) x "sbin\0" x "bin\0"
+
+#define PATH_NORMAL_SBIN_BIN(x) x "bin"
+#define PATH_NORMAL_SBIN_BIN_NULSTR(x) x "bin\0"
+
+#if HAVE_SPLIT_BIN
+# define PATH_SBIN_BIN(x) PATH_SPLIT_SBIN_BIN(x)
+# define PATH_SBIN_BIN_NULSTR(x) PATH_SPLIT_SBIN_BIN_NULSTR(x)
+#else
+# define PATH_SBIN_BIN(x) PATH_NORMAL_SBIN_BIN(x)
+# define PATH_SBIN_BIN_NULSTR(x) PATH_NORMAL_SBIN_BIN_NULSTR(x)
+#endif
+
+#define DEFAULT_PATH_NORMAL PATH_SBIN_BIN("/usr/local/") ":" PATH_SBIN_BIN("/usr/")
+#define DEFAULT_PATH_NORMAL_NULSTR PATH_SBIN_BIN_NULSTR("/usr/local/") PATH_SBIN_BIN_NULSTR("/usr/")
+#define DEFAULT_PATH_SPLIT_USR DEFAULT_PATH_NORMAL ":" PATH_SBIN_BIN("/")
+#define DEFAULT_PATH_SPLIT_USR_NULSTR DEFAULT_PATH_NORMAL_NULSTR PATH_SBIN_BIN_NULSTR("/")
+#define DEFAULT_PATH_COMPAT PATH_SPLIT_SBIN_BIN("/usr/local/") ":" PATH_SPLIT_SBIN_BIN("/usr/") ":" PATH_SPLIT_SBIN_BIN("/")
+
+#if HAVE_SPLIT_USR
+# define DEFAULT_PATH DEFAULT_PATH_SPLIT_USR
+# define DEFAULT_PATH_NULSTR DEFAULT_PATH_SPLIT_USR_NULSTR
+#else
+# define DEFAULT_PATH DEFAULT_PATH_NORMAL
+# define DEFAULT_PATH_NULSTR DEFAULT_PATH_NORMAL_NULSTR
+#endif
+
+bool is_path(const char *p) _pure_;
+int path_split_and_make_absolute(const char *p, char ***ret);
+bool path_is_absolute(const char *p) _pure_;
+char* path_make_absolute(const char *p, const char *prefix);
+int safe_getcwd(char **ret);
+int path_make_absolute_cwd(const char *p, char **ret);
+int path_make_relative(const char *from_dir, const char *to_path, char **_r);
+char* path_startswith(const char *path, const char *prefix) _pure_;
+int path_compare(const char *a, const char *b) _pure_;
+bool path_equal(const char *a, const char *b) _pure_;
+bool path_equal_or_files_same(const char *a, const char *b, int flags);
+char* path_join_internal(const char *first, ...);
+#define path_join(x, ...) path_join_internal(x, __VA_ARGS__, (const char*) -1)
+
+char* path_simplify(char *path, bool kill_dots);
+
+static inline bool path_equal_ptr(const char *a, const char *b) {
+ return !!a == !!b && (!a || path_equal(a, b));
+}
+
+/* Note: the search terminates on the first NULL item. */
+#define PATH_IN_SET(p, ...) \
+ ({ \
+ char **_s; \
+ bool _found = false; \
+ STRV_FOREACH(_s, STRV_MAKE(__VA_ARGS__)) \
+ if (path_equal(p, *_s)) { \
+ _found = true; \
+ break; \
+ } \
+ _found; \
+ })
+
+#define PATH_STARTSWITH_SET(p, ...) \
+ ({ \
+ const char *_p = (p); \
+ char *_found = NULL, **_i; \
+ STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \
+ _found = path_startswith(_p, *_i); \
+ if (_found) \
+ break; \
+ } \
+ _found; \
+ })
+
+int path_strv_make_absolute_cwd(char **l);
+char** path_strv_resolve(char **l, const char *root);
+char** path_strv_resolve_uniq(char **l, const char *root);
+
+int find_binary(const char *name, char **filename);
+
+bool paths_check_timestamp(const char* const* paths, usec_t *paths_ts_usec, bool update);
+
+int fsck_exists(const char *fstype);
+int mkfs_exists(const char *fstype);
+
+/* Iterates through the path prefixes of the specified path, going up
+ * the tree, to root. Also returns "" (and not "/"!) for the root
+ * directory. Excludes the specified directory itself */
+#define PATH_FOREACH_PREFIX(prefix, path) \
+ for (char *_slash = ({ \
+ path_simplify(strcpy(prefix, path), false); \
+ streq(prefix, "/") ? NULL : strrchr(prefix, '/'); \
+ }); \
+ _slash && ((*_slash = 0), true); \
+ _slash = strrchr((prefix), '/'))
+
+/* Same as PATH_FOREACH_PREFIX but also includes the specified path itself */
+#define PATH_FOREACH_PREFIX_MORE(prefix, path) \
+ for (char *_slash = ({ \
+ path_simplify(strcpy(prefix, path), false); \
+ if (streq(prefix, "/")) \
+ prefix[0] = 0; \
+ strrchr(prefix, 0); \
+ }); \
+ _slash && ((*_slash = 0), true); \
+ _slash = strrchr((prefix), '/'))
+
+char *prefix_root(const char *root, const char *path);
+
+/* Similar to prefix_root(), but returns an alloca() buffer, or
+ * possibly a const pointer into the path parameter */
+#define prefix_roota(root, path) \
+ ({ \
+ const char* _path = (path), *_root = (root), *_ret; \
+ char *_p, *_n; \
+ size_t _l; \
+ while (_path[0] == '/' && _path[1] == '/') \
+ _path ++; \
+ if (empty_or_root(_root)) \
+ _ret = _path; \
+ else { \
+ _l = strlen(_root) + 1 + strlen(_path) + 1; \
+ _n = newa(char, _l); \
+ _p = stpcpy(_n, _root); \
+ while (_p > _n && _p[-1] == '/') \
+ _p--; \
+ if (_path[0] != '/') \
+ *(_p++) = '/'; \
+ strcpy(_p, _path); \
+ _ret = _n; \
+ } \
+ _ret; \
+ })
+
+int parse_path_argument_and_warn(const char *path, bool suppress_root, char **arg);
+
+char* dirname_malloc(const char *path);
+const char *last_path_component(const char *path);
+int path_extract_filename(const char *p, char **ret);
+
+bool filename_is_valid(const char *p) _pure_;
+bool path_is_valid(const char *p) _pure_;
+bool path_is_normalized(const char *p) _pure_;
+
+char *file_in_same_dir(const char *path, const char *filename);
+
+bool hidden_or_backup_file(const char *filename) _pure_;
+
+bool is_device_path(const char *path);
+
+bool valid_device_node_path(const char *path);
+bool valid_device_allow_pattern(const char *path);
+
+int systemd_installation_has_version(const char *root, unsigned minimal_version);
+
+bool dot_or_dot_dot(const char *path);
+
+static inline const char *skip_dev_prefix(const char *p) {
+ const char *e;
+
+ /* Drop any /dev prefix if there is any */
+
+ e = path_startswith(p, "/dev/");
+
+ return e ?: p;
+}
+
+bool empty_or_root(const char *root);
+static inline const char *empty_to_root(const char *path) {
+ return isempty(path) ? "/" : path;
+}
+
+enum {
+ PATH_CHECK_FATAL = 1 << 0, /* If not set, then error message is appended with 'ignoring'. */
+ PATH_CHECK_ABSOLUTE = 1 << 1,
+ PATH_CHECK_RELATIVE = 1 << 2,
+};
+
+int path_simplify_and_warn(char *path, unsigned flag, const char *unit, const char *filename, unsigned line, const char *lvalue);
diff --git a/src/basic/prioq.c b/src/basic/prioq.c
new file mode 100644
index 0000000..76b27fa
--- /dev/null
+++ b/src/basic/prioq.c
@@ -0,0 +1,300 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+/*
+ * Priority Queue
+ * The prioq object implements a priority queue. That is, it orders objects by
+ * their priority and allows O(1) access to the object with the highest
+ * priority. Insertion and removal are Θ(log n). Optionally, the caller can
+ * provide a pointer to an index which will be kept up-to-date by the prioq.
+ *
+ * The underlying algorithm used in this implementation is a Heap.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "hashmap.h"
+#include "prioq.h"
+
+struct prioq_item {
+ void *data;
+ unsigned *idx;
+};
+
+struct Prioq {
+ compare_func_t compare_func;
+ unsigned n_items, n_allocated;
+
+ struct prioq_item *items;
+};
+
+Prioq *prioq_new(compare_func_t compare_func) {
+ Prioq *q;
+
+ q = new(Prioq, 1);
+ if (!q)
+ return q;
+
+ *q = (Prioq) {
+ .compare_func = compare_func,
+ };
+
+ return q;
+}
+
+Prioq* prioq_free(Prioq *q) {
+ if (!q)
+ return NULL;
+
+ free(q->items);
+ return mfree(q);
+}
+
+int prioq_ensure_allocated(Prioq **q, compare_func_t compare_func) {
+ assert(q);
+
+ if (*q)
+ return 0;
+
+ *q = prioq_new(compare_func);
+ if (!*q)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void swap(Prioq *q, unsigned j, unsigned k) {
+ assert(q);
+ assert(j < q->n_items);
+ assert(k < q->n_items);
+
+ assert(!q->items[j].idx || *(q->items[j].idx) == j);
+ assert(!q->items[k].idx || *(q->items[k].idx) == k);
+
+ SWAP_TWO(q->items[j].data, q->items[k].data);
+ SWAP_TWO(q->items[j].idx, q->items[k].idx);
+
+ if (q->items[j].idx)
+ *q->items[j].idx = j;
+
+ if (q->items[k].idx)
+ *q->items[k].idx = k;
+}
+
+static unsigned shuffle_up(Prioq *q, unsigned idx) {
+ assert(q);
+ assert(idx < q->n_items);
+
+ while (idx > 0) {
+ unsigned k;
+
+ k = (idx-1)/2;
+
+ if (q->compare_func(q->items[k].data, q->items[idx].data) <= 0)
+ break;
+
+ swap(q, idx, k);
+ idx = k;
+ }
+
+ return idx;
+}
+
+static unsigned shuffle_down(Prioq *q, unsigned idx) {
+ assert(q);
+
+ for (;;) {
+ unsigned j, k, s;
+
+ k = (idx+1)*2; /* right child */
+ j = k-1; /* left child */
+
+ if (j >= q->n_items)
+ break;
+
+ if (q->compare_func(q->items[j].data, q->items[idx].data) < 0)
+
+ /* So our left child is smaller than we are, let's
+ * remember this fact */
+ s = j;
+ else
+ s = idx;
+
+ if (k < q->n_items &&
+ q->compare_func(q->items[k].data, q->items[s].data) < 0)
+
+ /* So our right child is smaller than we are, let's
+ * remember this fact */
+ s = k;
+
+ /* s now points to the smallest of the three items */
+
+ if (s == idx)
+ /* No swap necessary, we're done */
+ break;
+
+ swap(q, idx, s);
+ idx = s;
+ }
+
+ return idx;
+}
+
+int prioq_put(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+ unsigned k;
+
+ assert(q);
+
+ if (q->n_items >= q->n_allocated) {
+ unsigned n;
+ struct prioq_item *j;
+
+ n = MAX((q->n_items+1) * 2, 16u);
+ j = reallocarray(q->items, n, sizeof(struct prioq_item));
+ if (!j)
+ return -ENOMEM;
+
+ q->items = j;
+ q->n_allocated = n;
+ }
+
+ k = q->n_items++;
+ i = q->items + k;
+ i->data = data;
+ i->idx = idx;
+
+ if (idx)
+ *idx = k;
+
+ shuffle_up(q, k);
+
+ return 0;
+}
+
+static void remove_item(Prioq *q, struct prioq_item *i) {
+ struct prioq_item *l;
+
+ assert(q);
+ assert(i);
+
+ l = q->items + q->n_items - 1;
+
+ if (i == l)
+ /* Last entry, let's just remove it */
+ q->n_items--;
+ else {
+ unsigned k;
+
+ /* Not last entry, let's replace the last entry with
+ * this one, and reshuffle */
+
+ k = i - q->items;
+
+ i->data = l->data;
+ i->idx = l->idx;
+ if (i->idx)
+ *i->idx = k;
+ q->n_items--;
+
+ k = shuffle_down(q, k);
+ shuffle_up(q, k);
+ }
+}
+
+_pure_ static struct prioq_item* find_item(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+
+ assert(q);
+
+ if (q->n_items <= 0)
+ return NULL;
+
+ if (idx) {
+ if (*idx == PRIOQ_IDX_NULL ||
+ *idx >= q->n_items)
+ return NULL;
+
+ i = q->items + *idx;
+ if (i->data != data)
+ return NULL;
+
+ return i;
+ } else {
+ for (i = q->items; i < q->items + q->n_items; i++)
+ if (i->data == data)
+ return i;
+ return NULL;
+ }
+}
+
+int prioq_remove(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+
+ if (!q)
+ return 0;
+
+ i = find_item(q, data, idx);
+ if (!i)
+ return 0;
+
+ remove_item(q, i);
+ return 1;
+}
+
+int prioq_reshuffle(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+ unsigned k;
+
+ assert(q);
+
+ i = find_item(q, data, idx);
+ if (!i)
+ return 0;
+
+ k = i - q->items;
+ k = shuffle_down(q, k);
+ shuffle_up(q, k);
+ return 1;
+}
+
+void *prioq_peek_by_index(Prioq *q, unsigned idx) {
+ if (!q)
+ return NULL;
+
+ if (idx >= q->n_items)
+ return NULL;
+
+ return q->items[idx].data;
+}
+
+void *prioq_pop(Prioq *q) {
+ void *data;
+
+ if (!q)
+ return NULL;
+
+ if (q->n_items <= 0)
+ return NULL;
+
+ data = q->items[0].data;
+ remove_item(q, q->items);
+ return data;
+}
+
+unsigned prioq_size(Prioq *q) {
+
+ if (!q)
+ return 0;
+
+ return q->n_items;
+}
+
+bool prioq_isempty(Prioq *q) {
+
+ if (!q)
+ return true;
+
+ return q->n_items <= 0;
+}
diff --git a/src/basic/prioq.h b/src/basic/prioq.h
new file mode 100644
index 0000000..1fb57bf
--- /dev/null
+++ b/src/basic/prioq.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+
+typedef struct Prioq Prioq;
+
+#define PRIOQ_IDX_NULL ((unsigned) -1)
+
+Prioq *prioq_new(compare_func_t compare);
+Prioq *prioq_free(Prioq *q);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Prioq*, prioq_free);
+int prioq_ensure_allocated(Prioq **q, compare_func_t compare_func);
+
+int prioq_put(Prioq *q, void *data, unsigned *idx);
+int prioq_remove(Prioq *q, void *data, unsigned *idx);
+int prioq_reshuffle(Prioq *q, void *data, unsigned *idx);
+
+void *prioq_peek_by_index(Prioq *q, unsigned idx) _pure_;
+static inline void *prioq_peek(Prioq *q) {
+ return prioq_peek_by_index(q, 0);
+}
+void *prioq_pop(Prioq *q);
+
+#define PRIOQ_FOREACH_ITEM(q, p) \
+ for (unsigned _i = 0; (p = prioq_peek_by_index(q, _i)); _i++)
+
+unsigned prioq_size(Prioq *q) _pure_;
+bool prioq_isempty(Prioq *q) _pure_;
diff --git a/src/basic/proc-cmdline.c b/src/basic/proc-cmdline.c
new file mode 100644
index 0000000..1670001
--- /dev/null
+++ b/src/basic/proc-cmdline.c
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "special.h"
+#include "string-util.h"
+#include "util.h"
+#include "virt.h"
+
+int proc_cmdline(char **ret) {
+ const char *e;
+ assert(ret);
+
+ /* For testing purposes it is sometimes useful to be able to override what we consider /proc/cmdline to be */
+ e = secure_getenv("SYSTEMD_PROC_CMDLINE");
+ if (e) {
+ char *m;
+
+ m = strdup(e);
+ if (!m)
+ return -ENOMEM;
+
+ *ret = m;
+ return 0;
+ }
+
+ if (detect_container() > 0)
+ return get_process_cmdline(1, 0, false, ret);
+ else
+ return read_one_line_file("/proc/cmdline", ret);
+}
+
+static int proc_cmdline_extract_first(const char **p, char **ret_word, ProcCmdlineFlags flags) {
+ const char *q = *p;
+ int r;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ const char *c;
+
+ r = extract_first_word(&q, &word, NULL, EXTRACT_QUOTES|EXTRACT_RELAX);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ /* Filter out arguments that are intended only for the initrd */
+ c = startswith(word, "rd.");
+ if (c) {
+ if (!in_initrd())
+ continue;
+
+ if (FLAGS_SET(flags, PROC_CMDLINE_STRIP_RD_PREFIX)) {
+ r = free_and_strdup(&word, c);
+ if (r < 0)
+ return r;
+ }
+
+ } else if (FLAGS_SET(flags, PROC_CMDLINE_RD_STRICT) && in_initrd())
+ continue; /* And optionally filter out arguments that are intended only for the host */
+
+ *p = q;
+ *ret_word = TAKE_PTR(word);
+ return 1;
+ }
+
+ *p = q;
+ *ret_word = NULL;
+ return 0;
+}
+
+int proc_cmdline_parse_given(const char *line, proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags) {
+ const char *p;
+ int r;
+
+ assert(parse_item);
+
+ /* The PROC_CMDLINE_VALUE_OPTIONAL flag doesn't really make sense for proc_cmdline_parse(), let's make this
+ * clear. */
+ assert(!FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL));
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ char *value;
+
+ r = proc_cmdline_extract_first(&p, &word, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ value = strchr(word, '=');
+ if (value)
+ *(value++) = 0;
+
+ r = parse_item(word, value, data);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int proc_cmdline_parse(proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags) {
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ assert(parse_item);
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ return proc_cmdline_parse_given(line, parse_item, data, flags);
+}
+
+static bool relaxed_equal_char(char a, char b) {
+ return a == b ||
+ (a == '_' && b == '-') ||
+ (a == '-' && b == '_');
+}
+
+char *proc_cmdline_key_startswith(const char *s, const char *prefix) {
+ assert(s);
+ assert(prefix);
+
+ /* Much like startswith(), but considers "-" and "_" the same */
+
+ for (; *prefix != 0; s++, prefix++)
+ if (!relaxed_equal_char(*s, *prefix))
+ return NULL;
+
+ return (char*) s;
+}
+
+bool proc_cmdline_key_streq(const char *x, const char *y) {
+ assert(x);
+ assert(y);
+
+ /* Much like streq(), but considers "-" and "_" the same */
+
+ for (; *x != 0 || *y != 0; x++, y++)
+ if (!relaxed_equal_char(*x, *y))
+ return false;
+
+ return true;
+}
+
+int proc_cmdline_get_key(const char *key, ProcCmdlineFlags flags, char **ret_value) {
+ _cleanup_free_ char *line = NULL, *ret = NULL;
+ bool found = false;
+ const char *p;
+ int r;
+
+ /* Looks for a specific key on the kernel command line. Supports three modes:
+ *
+ * a) The "ret_value" parameter is used. In this case a parameter beginning with the "key" string followed by
+ * "=" is searched for, and the value following it is returned in "ret_value".
+ *
+ * b) as above, but the PROC_CMDLINE_VALUE_OPTIONAL flag is set. In this case if the key is found as a separate
+ * word (i.e. not followed by "=" but instead by whitespace or the end of the command line), then this is
+ * also accepted, and "value" is returned as NULL.
+ *
+ * c) The "ret_value" parameter is NULL. In this case a search for the exact "key" parameter is performed.
+ *
+ * In all three cases, > 0 is returned if the key is found, 0 if not. */
+
+ if (isempty(key))
+ return -EINVAL;
+
+ if (FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL) && !ret_value)
+ return -EINVAL;
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = proc_cmdline_extract_first(&p, &word, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (ret_value) {
+ const char *e;
+
+ e = proc_cmdline_key_startswith(word, key);
+ if (!e)
+ continue;
+
+ if (*e == '=') {
+ r = free_and_strdup(&ret, e+1);
+ if (r < 0)
+ return r;
+
+ found = true;
+
+ } else if (*e == 0 && FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL))
+ found = true;
+
+ } else {
+ if (streq(word, key)) {
+ found = true;
+ break; /* we found what we were looking for */
+ }
+ }
+ }
+
+ if (ret_value)
+ *ret_value = TAKE_PTR(ret);
+
+ return found;
+}
+
+int proc_cmdline_get_bool(const char *key, bool *ret) {
+ _cleanup_free_ char *v = NULL;
+ int r;
+
+ assert(ret);
+
+ r = proc_cmdline_get_key(key, PROC_CMDLINE_VALUE_OPTIONAL, &v);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *ret = false;
+ return 0;
+ }
+
+ if (v) { /* parameter passed */
+ r = parse_boolean(v);
+ if (r < 0)
+ return r;
+ *ret = r;
+ } else /* no parameter passed */
+ *ret = true;
+
+ return 1;
+}
+
+int proc_cmdline_get_key_many_internal(ProcCmdlineFlags flags, ...) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ va_list ap;
+ int r, ret = 0;
+
+ /* The PROC_CMDLINE_VALUE_OPTIONAL flag doesn't really make sense for proc_cmdline_get_key_many(), let's make
+ * this clear. */
+ assert(!FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL));
+
+ /* This call may clobber arguments on failure! */
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = proc_cmdline_extract_first(&p, &word, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ va_start(ap, flags);
+
+ for (;;) {
+ char **v;
+ const char *k, *e;
+
+ k = va_arg(ap, const char*);
+ if (!k)
+ break;
+
+ assert_se(v = va_arg(ap, char**));
+
+ e = proc_cmdline_key_startswith(word, k);
+ if (e && *e == '=') {
+ r = free_and_strdup(v, e + 1);
+ if (r < 0) {
+ va_end(ap);
+ return r;
+ }
+
+ ret++;
+ }
+ }
+
+ va_end(ap);
+ }
+
+ return ret;
+}
+
+int shall_restore_state(void) {
+ bool ret;
+ int r;
+
+ r = proc_cmdline_get_bool("systemd.restore_state", &ret);
+ if (r < 0)
+ return r;
+
+ return r > 0 ? ret : true;
+}
+
+static const char * const rlmap[] = {
+ "emergency", SPECIAL_EMERGENCY_TARGET,
+ "-b", SPECIAL_EMERGENCY_TARGET,
+ "rescue", SPECIAL_RESCUE_TARGET,
+ "single", SPECIAL_RESCUE_TARGET,
+ "-s", SPECIAL_RESCUE_TARGET,
+ "s", SPECIAL_RESCUE_TARGET,
+ "S", SPECIAL_RESCUE_TARGET,
+ "1", SPECIAL_RESCUE_TARGET,
+ "2", SPECIAL_MULTI_USER_TARGET,
+ "3", SPECIAL_MULTI_USER_TARGET,
+ "4", SPECIAL_MULTI_USER_TARGET,
+ "5", SPECIAL_GRAPHICAL_TARGET,
+ NULL
+};
+
+static const char * const rlmap_initrd[] = {
+ "emergency", SPECIAL_EMERGENCY_TARGET,
+ "rescue", SPECIAL_RESCUE_TARGET,
+ NULL
+};
+
+const char* runlevel_to_target(const char *word) {
+ const char * const *rlmap_ptr;
+ size_t i;
+
+ if (!word)
+ return NULL;
+
+ if (in_initrd()) {
+ word = startswith(word, "rd.");
+ if (!word)
+ return NULL;
+ }
+
+ rlmap_ptr = in_initrd() ? rlmap_initrd : rlmap;
+
+ for (i = 0; rlmap_ptr[i]; i += 2)
+ if (streq(word, rlmap_ptr[i]))
+ return rlmap_ptr[i+1];
+
+ return NULL;
+}
diff --git a/src/basic/proc-cmdline.h b/src/basic/proc-cmdline.h
new file mode 100644
index 0000000..ff04379
--- /dev/null
+++ b/src/basic/proc-cmdline.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "log.h"
+
+typedef enum ProcCmdlineFlags {
+ PROC_CMDLINE_STRIP_RD_PREFIX = 1 << 0,
+ PROC_CMDLINE_VALUE_OPTIONAL = 1 << 1,
+ PROC_CMDLINE_RD_STRICT = 1 << 2,
+} ProcCmdlineFlags;
+
+typedef int (*proc_cmdline_parse_t)(const char *key, const char *value, void *data);
+
+int proc_cmdline(char **ret);
+
+int proc_cmdline_parse_given(const char *line, proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags);
+int proc_cmdline_parse(const proc_cmdline_parse_t parse, void *userdata, ProcCmdlineFlags flags);
+
+int proc_cmdline_get_key(const char *parameter, ProcCmdlineFlags flags, char **value);
+int proc_cmdline_get_bool(const char *key, bool *ret);
+
+int proc_cmdline_get_key_many_internal(ProcCmdlineFlags flags, ...);
+#define proc_cmdline_get_key_many(flags, ...) proc_cmdline_get_key_many_internal(flags, __VA_ARGS__, NULL)
+
+char *proc_cmdline_key_startswith(const char *s, const char *prefix);
+bool proc_cmdline_key_streq(const char *x, const char *y);
+
+int shall_restore_state(void);
+const char* runlevel_to_target(const char *rl);
+
+/* A little helper call, to be used in proc_cmdline_parse_t callbacks */
+static inline bool proc_cmdline_value_missing(const char *key, const char *value) {
+ if (!value) {
+ log_warning("Missing argument for %s= kernel command line switch, ignoring.", key);
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/basic/process-util.c b/src/basic/process-util.c
new file mode 100644
index 0000000..78ce43b
--- /dev/null
+++ b/src/basic/process-util.c
@@ -0,0 +1,1565 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/oom.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/personality.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <syslog.h>
+#include <unistd.h>
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "ioprio.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int get_process_state(pid_t pid) {
+ const char *p;
+ char state;
+ int r;
+ _cleanup_free_ char *line = NULL;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "stat");
+
+ r = read_one_line_file(p, &line);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ p = strrchr(line, ')');
+ if (!p)
+ return -EIO;
+
+ p++;
+
+ if (sscanf(p, " %c", &state) != 1)
+ return -EIO;
+
+ return (unsigned char) state;
+}
+
+int get_process_comm(pid_t pid, char **ret) {
+ _cleanup_free_ char *escaped = NULL, *comm = NULL;
+ const char *p;
+ int r;
+
+ assert(ret);
+ assert(pid >= 0);
+
+ escaped = new(char, TASK_COMM_LEN);
+ if (!escaped)
+ return -ENOMEM;
+
+ p = procfs_file_alloca(pid, "comm");
+
+ r = read_one_line_file(p, &comm);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* Escape unprintable characters, just in case, but don't grow the string beyond the underlying size */
+ cellescape(escaped, TASK_COMM_LEN, comm);
+
+ *ret = TAKE_PTR(escaped);
+ return 0;
+}
+
+int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool space = false;
+ char *k;
+ _cleanup_free_ char *ans = NULL;
+ const char *p;
+ int c;
+
+ assert(line);
+ assert(pid >= 0);
+
+ /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
+ * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
+ * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
+ * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
+ * command line that resolves to the empty string will return the "comm" name of the process instead.
+ *
+ * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
+ * comm_fallback is false). Returns 0 and sets *line otherwise. */
+
+ p = procfs_file_alloca(pid, "cmdline");
+
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return -ESRCH;
+ return -errno;
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ if (max_length == 0) {
+ /* This is supposed to be a safety guard against runaway command lines. */
+ long l = sysconf(_SC_ARG_MAX);
+ assert(l > 0);
+ max_length = l;
+ }
+
+ if (max_length == 1) {
+
+ /* If there's only room for one byte, return the empty string */
+ ans = new0(char, 1);
+ if (!ans)
+ return -ENOMEM;
+
+ *line = TAKE_PTR(ans);
+ return 0;
+
+ } else {
+ bool dotdotdot = false;
+ size_t left;
+
+ ans = new(char, max_length);
+ if (!ans)
+ return -ENOMEM;
+
+ k = ans;
+ left = max_length;
+ while ((c = getc(f)) != EOF) {
+
+ if (isprint(c)) {
+
+ if (space) {
+ if (left <= 2) {
+ dotdotdot = true;
+ break;
+ }
+
+ *(k++) = ' ';
+ left--;
+ space = false;
+ }
+
+ if (left <= 1) {
+ dotdotdot = true;
+ break;
+ }
+
+ *(k++) = (char) c;
+ left--;
+ } else if (k > ans)
+ space = true;
+ }
+
+ if (dotdotdot) {
+ if (max_length <= 4) {
+ k = ans;
+ left = max_length;
+ } else {
+ k = ans + max_length - 4;
+ left = 4;
+
+ /* Eat up final spaces */
+ while (k > ans && isspace(k[-1])) {
+ k--;
+ left++;
+ }
+ }
+
+ strncpy(k, "...", left-1);
+ k[left-1] = 0;
+ } else
+ *k = 0;
+ }
+
+ /* Kernel threads have no argv[] */
+ if (isempty(ans)) {
+ _cleanup_free_ char *t = NULL;
+ int h;
+
+ ans = mfree(ans);
+
+ if (!comm_fallback)
+ return -ENOENT;
+
+ h = get_process_comm(pid, &t);
+ if (h < 0)
+ return h;
+
+ size_t l = strlen(t);
+
+ if (l + 3 <= max_length) {
+ ans = strjoin("[", t, "]");
+ if (!ans)
+ return -ENOMEM;
+
+ } else if (max_length <= 6) {
+ ans = new(char, max_length);
+ if (!ans)
+ return -ENOMEM;
+
+ memcpy(ans, "[...]", max_length-1);
+ ans[max_length-1] = 0;
+ } else {
+ t[max_length - 6] = 0;
+
+ /* Chop off final spaces */
+ delete_trailing_chars(t, WHITESPACE);
+
+ ans = strjoin("[", t, "...]");
+ if (!ans)
+ return -ENOMEM;
+ }
+
+ *line = TAKE_PTR(ans);
+ return 0;
+ }
+
+ k = realloc(ans, strlen(ans) + 1);
+ if (!k)
+ return -ENOMEM;
+
+ ans = NULL;
+ *line = k;
+
+ return 0;
+}
+
+int rename_process(const char name[]) {
+ static size_t mm_size = 0;
+ static char *mm = NULL;
+ bool truncated = false;
+ size_t l;
+
+ /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
+ * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
+ * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
+ * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
+ * truncated.
+ *
+ * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
+
+ if (isempty(name))
+ return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
+
+ if (!is_main_thread())
+ return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we
+ * cache things without locking, and we make assumptions that PR_SET_NAME sets the
+ * process name that isn't correct on any other threads */
+
+ l = strlen(name);
+
+ /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we
+ * can use PR_SET_NAME, which sets the thread name for the calling thread. */
+ if (prctl(PR_SET_NAME, name) < 0)
+ log_debug_errno(errno, "PR_SET_NAME failed: %m");
+ if (l >= TASK_COMM_LEN) /* Linux process names can be 15 chars at max */
+ truncated = true;
+
+ /* Second step, change glibc's ID of the process name. */
+ if (program_invocation_name) {
+ size_t k;
+
+ k = strlen(program_invocation_name);
+ strncpy(program_invocation_name, name, k);
+ if (l > k)
+ truncated = true;
+ }
+
+ /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
+ * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
+ * the end. This is the best option for changing /proc/self/cmdline. */
+
+ /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
+ * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
+ * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
+ * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
+ * mmap() is not. */
+ if (geteuid() != 0)
+ log_debug("Skipping PR_SET_MM, as we don't have privileges.");
+ else if (mm_size < l+1) {
+ size_t nn_size;
+ char *nn;
+
+ nn_size = PAGE_ALIGN(l+1);
+ nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (nn == MAP_FAILED) {
+ log_debug_errno(errno, "mmap() failed: %m");
+ goto use_saved_argv;
+ }
+
+ strncpy(nn, name, nn_size);
+
+ /* Now, let's tell the kernel about this new memory */
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
+ /* HACK: prctl() API is kind of dumb on this point. The existing end address may already be
+ * below the desired start address, in which case the kernel may have kicked this back due
+ * to a range-check failure (see linux/kernel/sys.c:validate_prctl_map() to see this in
+ * action). The proper solution would be to have a prctl() API that could set both start+end
+ * simultaneously, or at least let us query the existing address to anticipate this condition
+ * and respond accordingly. For now, we can only guess at the cause of this failure and try
+ * a workaround--which will briefly expand the arg space to something potentially huge before
+ * resizing it to what we want. */
+ log_debug_errno(errno, "PR_SET_MM_ARG_START failed, attempting PR_SET_MM_ARG_END hack: %m");
+
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) {
+ log_debug_errno(errno, "PR_SET_MM_ARG_END hack failed, proceeding without: %m");
+ (void) munmap(nn, nn_size);
+ goto use_saved_argv;
+ }
+
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
+ log_debug_errno(errno, "PR_SET_MM_ARG_START still failed, proceeding without: %m");
+ goto use_saved_argv;
+ }
+ } else {
+ /* And update the end pointer to the new end, too. If this fails, we don't really know what
+ * to do, it's pretty unlikely that we can rollback, hence we'll just accept the failure,
+ * and continue. */
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
+ log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
+ }
+
+ if (mm)
+ (void) munmap(mm, mm_size);
+
+ mm = nn;
+ mm_size = nn_size;
+ } else {
+ strncpy(mm, name, mm_size);
+
+ /* Update the end pointer, continuing regardless of any failure. */
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
+ log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
+ }
+
+use_saved_argv:
+ /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
+ * it still looks here */
+
+ if (saved_argc > 0) {
+ int i;
+
+ if (saved_argv[0]) {
+ size_t k;
+
+ k = strlen(saved_argv[0]);
+ strncpy(saved_argv[0], name, k);
+ if (l > k)
+ truncated = true;
+ }
+
+ for (i = 1; i < saved_argc; i++) {
+ if (!saved_argv[i])
+ break;
+
+ memzero(saved_argv[i], strlen(saved_argv[i]));
+ }
+ }
+
+ return !truncated;
+}
+
+int is_kernel_thread(pid_t pid) {
+ _cleanup_free_ char *line = NULL;
+ unsigned long long flags;
+ size_t l, i;
+ const char *p;
+ char *q;
+ int r;
+
+ if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
+ return 0;
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ p = procfs_file_alloca(pid, "stat");
+ r = read_one_line_file(p, &line);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* Skip past the comm field */
+ q = strrchr(line, ')');
+ if (!q)
+ return -EINVAL;
+ q++;
+
+ /* Skip 6 fields to reach the flags field */
+ for (i = 0; i < 6; i++) {
+ l = strspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q += l;
+
+ l = strcspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q += l;
+ }
+
+ /* Skip preceding whitespace */
+ l = strspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q += l;
+
+ /* Truncate the rest */
+ l = strcspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q[l] = 0;
+
+ r = safe_atollu(q, &flags);
+ if (r < 0)
+ return r;
+
+ return !!(flags & PF_KTHREAD);
+}
+
+int get_process_capeff(pid_t pid, char **capeff) {
+ const char *p;
+ int r;
+
+ assert(capeff);
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "status");
+
+ r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
+ if (r == -ENOENT)
+ return -ESRCH;
+
+ return r;
+}
+
+static int get_process_link_contents(const char *proc_file, char **name) {
+ int r;
+
+ assert(proc_file);
+ assert(name);
+
+ r = readlink_malloc(proc_file, name);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int get_process_exe(pid_t pid, char **name) {
+ const char *p;
+ char *d;
+ int r;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "exe");
+ r = get_process_link_contents(p, name);
+ if (r < 0)
+ return r;
+
+ d = endswith(*name, " (deleted)");
+ if (d)
+ *d = '\0';
+
+ return 0;
+}
+
+static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+ int r;
+
+ assert(field);
+ assert(uid);
+
+ if (pid < 0)
+ return -EINVAL;
+
+ p = procfs_file_alloca(pid, "status");
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return -ESRCH;
+ return -errno;
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+
+ if (startswith(l, field)) {
+ l += strlen(field);
+ l += strspn(l, WHITESPACE);
+
+ l[strcspn(l, WHITESPACE)] = 0;
+
+ return parse_uid(l, uid);
+ }
+ }
+
+ return -EIO;
+}
+
+int get_process_uid(pid_t pid, uid_t *uid) {
+
+ if (pid == 0 || pid == getpid_cached()) {
+ *uid = getuid();
+ return 0;
+ }
+
+ return get_process_id(pid, "Uid:", uid);
+}
+
+int get_process_gid(pid_t pid, gid_t *gid) {
+
+ if (pid == 0 || pid == getpid_cached()) {
+ *gid = getgid();
+ return 0;
+ }
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ return get_process_id(pid, "Gid:", gid);
+}
+
+int get_process_cwd(pid_t pid, char **cwd) {
+ const char *p;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "cwd");
+
+ return get_process_link_contents(p, cwd);
+}
+
+int get_process_root(pid_t pid, char **root) {
+ const char *p;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "root");
+
+ return get_process_link_contents(p, root);
+}
+
+#define ENVIRONMENT_BLOCK_MAX (5U*1024U*1024U)
+
+int get_process_environ(pid_t pid, char **env) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *outcome = NULL;
+ size_t allocated = 0, sz = 0;
+ const char *p;
+ int r;
+
+ assert(pid >= 0);
+ assert(env);
+
+ p = procfs_file_alloca(pid, "environ");
+
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return -ESRCH;
+ return -errno;
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ for (;;) {
+ char c;
+
+ if (sz >= ENVIRONMENT_BLOCK_MAX)
+ return -ENOBUFS;
+
+ if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
+ return -ENOMEM;
+
+ r = safe_fgetc(f, &c);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (c == '\0')
+ outcome[sz++] = '\n';
+ else
+ sz += cescape_char(c, outcome + sz);
+ }
+
+ outcome[sz] = '\0';
+ *env = TAKE_PTR(outcome);
+
+ return 0;
+}
+
+int get_process_ppid(pid_t pid, pid_t *_ppid) {
+ int r;
+ _cleanup_free_ char *line = NULL;
+ long unsigned ppid;
+ const char *p;
+
+ assert(pid >= 0);
+ assert(_ppid);
+
+ if (pid == 0 || pid == getpid_cached()) {
+ *_ppid = getppid();
+ return 0;
+ }
+
+ p = procfs_file_alloca(pid, "stat");
+ r = read_one_line_file(p, &line);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* Let's skip the pid and comm fields. The latter is enclosed
+ * in () but does not escape any () in its value, so let's
+ * skip over it manually */
+
+ p = strrchr(line, ')');
+ if (!p)
+ return -EIO;
+
+ p++;
+
+ if (sscanf(p, " "
+ "%*c " /* state */
+ "%lu ", /* ppid */
+ &ppid) != 1)
+ return -EIO;
+
+ if ((long unsigned) (pid_t) ppid != ppid)
+ return -ERANGE;
+
+ *_ppid = (pid_t) ppid;
+
+ return 0;
+}
+
+int wait_for_terminate(pid_t pid, siginfo_t *status) {
+ siginfo_t dummy;
+
+ assert(pid >= 1);
+
+ if (!status)
+ status = &dummy;
+
+ for (;;) {
+ zero(*status);
+
+ if (waitid(P_PID, pid, status, WEXITED) < 0) {
+
+ if (errno == EINTR)
+ continue;
+
+ return negative_errno();
+ }
+
+ return 0;
+ }
+}
+
+/*
+ * Return values:
+ * < 0 : wait_for_terminate() failed to get the state of the
+ * process, the process was terminated by a signal, or
+ * failed for an unknown reason.
+ * >=0 : The process terminated normally, and its exit code is
+ * returned.
+ *
+ * That is, success is indicated by a return value of zero, and an
+ * error is indicated by a non-zero value.
+ *
+ * A warning is emitted if the process terminates abnormally,
+ * and also if it returns non-zero unless check_exit_code is true.
+ */
+int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) {
+ _cleanup_free_ char *buffer = NULL;
+ siginfo_t status;
+ int r, prio;
+
+ assert(pid > 1);
+
+ if (!name) {
+ r = get_process_comm(pid, &buffer);
+ if (r < 0)
+ log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pid);
+ else
+ name = buffer;
+ }
+
+ prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG;
+
+ r = wait_for_terminate(pid, &status);
+ if (r < 0)
+ return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name));
+
+ if (status.si_code == CLD_EXITED) {
+ if (status.si_status != EXIT_SUCCESS)
+ log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG,
+ "%s failed with exit status %i.", strna(name), status.si_status);
+ else
+ log_debug("%s succeeded.", name);
+
+ return status.si_status;
+
+ } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
+
+ log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status));
+ return -EPROTO;
+ }
+
+ log_full(prio, "%s failed due to unknown reason.", strna(name));
+ return -EPROTO;
+}
+
+/*
+ * Return values:
+ *
+ * < 0 : wait_for_terminate_with_timeout() failed to get the state of the process, the process timed out, the process
+ * was terminated by a signal, or failed for an unknown reason.
+ *
+ * >=0 : The process terminated normally with no failures.
+ *
+ * Success is indicated by a return value of zero, a timeout is indicated by ETIMEDOUT, and all other child failure
+ * states are indicated by error is indicated by a non-zero value.
+ *
+ * This call assumes SIGCHLD has been blocked already, in particular before the child to wait for has been forked off
+ * to remain entirely race-free.
+ */
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
+ sigset_t mask;
+ int r;
+ usec_t until;
+
+ assert_se(sigemptyset(&mask) == 0);
+ assert_se(sigaddset(&mask, SIGCHLD) == 0);
+
+ /* Drop into a sigtimewait-based timeout. Waiting for the
+ * pid to exit. */
+ until = now(CLOCK_MONOTONIC) + timeout;
+ for (;;) {
+ usec_t n;
+ siginfo_t status = {};
+ struct timespec ts;
+
+ n = now(CLOCK_MONOTONIC);
+ if (n >= until)
+ break;
+
+ r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
+ /* Assuming we woke due to the child exiting. */
+ if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
+ if (status.si_pid == pid) {
+ /* This is the correct child.*/
+ if (status.si_code == CLD_EXITED)
+ return (status.si_status == 0) ? 0 : -EPROTO;
+ else
+ return -EPROTO;
+ }
+ }
+ /* Not the child, check for errors and proceed appropriately */
+ if (r < 0) {
+ switch (r) {
+ case -EAGAIN:
+ /* Timed out, child is likely hung. */
+ return -ETIMEDOUT;
+ case -EINTR:
+ /* Received a different signal and should retry */
+ continue;
+ default:
+ /* Return any unexpected errors */
+ return r;
+ }
+ }
+ }
+
+ return -EPROTO;
+}
+
+void sigkill_wait(pid_t pid) {
+ assert(pid > 1);
+
+ if (kill(pid, SIGKILL) >= 0)
+ (void) wait_for_terminate(pid, NULL);
+}
+
+void sigkill_waitp(pid_t *pid) {
+ PROTECT_ERRNO;
+
+ if (!pid)
+ return;
+ if (*pid <= 1)
+ return;
+
+ sigkill_wait(*pid);
+}
+
+void sigterm_wait(pid_t pid) {
+ assert(pid > 1);
+
+ if (kill_and_sigcont(pid, SIGTERM) >= 0)
+ (void) wait_for_terminate(pid, NULL);
+}
+
+int kill_and_sigcont(pid_t pid, int sig) {
+ int r;
+
+ r = kill(pid, sig) < 0 ? -errno : 0;
+
+ /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
+ * affected by a process being suspended anyway. */
+ if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
+ (void) kill(pid, SIGCONT);
+
+ return r;
+}
+
+int getenv_for_pid(pid_t pid, const char *field, char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char *value = NULL;
+ const char *path;
+ size_t l, sum = 0;
+ int r;
+
+ assert(pid >= 0);
+ assert(field);
+ assert(ret);
+
+ if (pid == 0 || pid == getpid_cached()) {
+ const char *e;
+
+ e = getenv(field);
+ if (!e) {
+ *ret = NULL;
+ return 0;
+ }
+
+ value = strdup(e);
+ if (!value)
+ return -ENOMEM;
+
+ *ret = value;
+ return 1;
+ }
+
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ path = procfs_file_alloca(pid, "environ");
+
+ f = fopen(path, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return -ESRCH;
+
+ return -errno;
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ l = strlen(field);
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ if (sum > ENVIRONMENT_BLOCK_MAX) /* Give up searching eventually */
+ return -ENOBUFS;
+
+ r = read_nul_string(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0) /* EOF */
+ break;
+
+ sum += r;
+
+ if (strneq(line, field, l) && line[l] == '=') {
+ value = strdup(line + l + 1);
+ if (!value)
+ return -ENOMEM;
+
+ *ret = value;
+ return 1;
+ }
+ }
+
+ *ret = NULL;
+ return 0;
+}
+
+bool pid_is_unwaited(pid_t pid) {
+ /* Checks whether a PID is still valid at all, including a zombie */
+
+ if (pid < 0)
+ return false;
+
+ if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
+ return true;
+
+ if (pid == getpid_cached())
+ return true;
+
+ if (kill(pid, 0) >= 0)
+ return true;
+
+ return errno != ESRCH;
+}
+
+bool pid_is_alive(pid_t pid) {
+ int r;
+
+ /* Checks whether a PID is still valid and not a zombie */
+
+ if (pid < 0)
+ return false;
+
+ if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
+ return true;
+
+ if (pid == getpid_cached())
+ return true;
+
+ r = get_process_state(pid);
+ if (IN_SET(r, -ESRCH, 'Z'))
+ return false;
+
+ return true;
+}
+
+int pid_from_same_root_fs(pid_t pid) {
+ const char *root;
+
+ if (pid < 0)
+ return false;
+
+ if (pid == 0 || pid == getpid_cached())
+ return true;
+
+ root = procfs_file_alloca(pid, "root");
+
+ return files_same(root, "/proc/1/root", 0);
+}
+
+bool is_main_thread(void) {
+ static thread_local int cached = 0;
+
+ if (_unlikely_(cached == 0))
+ cached = getpid_cached() == gettid() ? 1 : -1;
+
+ return cached > 0;
+}
+
+_noreturn_ void freeze(void) {
+
+ log_close();
+
+ /* Make sure nobody waits for us on a socket anymore */
+ close_all_fds(NULL, 0);
+
+ sync();
+
+ /* Let's not freeze right away, but keep reaping zombies. */
+ for (;;) {
+ int r;
+ siginfo_t si = {};
+
+ r = waitid(P_ALL, 0, &si, WEXITED);
+ if (r < 0 && errno != EINTR)
+ break;
+ }
+
+ /* waitid() failed with an unexpected error, things are really borked. Freeze now! */
+ for (;;)
+ pause();
+}
+
+bool oom_score_adjust_is_valid(int oa) {
+ return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
+}
+
+unsigned long personality_from_string(const char *p) {
+ int architecture;
+
+ if (!p)
+ return PERSONALITY_INVALID;
+
+ /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
+ * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
+ * the same register size. */
+
+ architecture = architecture_from_string(p);
+ if (architecture < 0)
+ return PERSONALITY_INVALID;
+
+ if (architecture == native_architecture())
+ return PER_LINUX;
+#ifdef SECONDARY_ARCHITECTURE
+ if (architecture == SECONDARY_ARCHITECTURE)
+ return PER_LINUX32;
+#endif
+
+ return PERSONALITY_INVALID;
+}
+
+const char* personality_to_string(unsigned long p) {
+ int architecture = _ARCHITECTURE_INVALID;
+
+ if (p == PER_LINUX)
+ architecture = native_architecture();
+#ifdef SECONDARY_ARCHITECTURE
+ else if (p == PER_LINUX32)
+ architecture = SECONDARY_ARCHITECTURE;
+#endif
+
+ if (architecture < 0)
+ return NULL;
+
+ return architecture_to_string(architecture);
+}
+
+int safe_personality(unsigned long p) {
+ int ret;
+
+ /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
+ * and in others as negative return value containing an errno-like value. Let's work around this: this is a
+ * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
+ * the return value indicating the same issue, so that we are definitely on the safe side.
+ *
+ * See https://github.com/systemd/systemd/issues/6737 */
+
+ errno = 0;
+ ret = personality(p);
+ if (ret < 0) {
+ if (errno != 0)
+ return -errno;
+
+ errno = -ret;
+ }
+
+ return ret;
+}
+
+int opinionated_personality(unsigned long *ret) {
+ int current;
+
+ /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
+ * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
+ * two most relevant personalities: PER_LINUX and PER_LINUX32. */
+
+ current = safe_personality(PERSONALITY_INVALID);
+ if (current < 0)
+ return current;
+
+ if (((unsigned long) current & 0xffff) == PER_LINUX32)
+ *ret = PER_LINUX32;
+ else
+ *ret = PER_LINUX;
+
+ return 0;
+}
+
+void valgrind_summary_hack(void) {
+#if HAVE_VALGRIND_VALGRIND_H
+ if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
+ pid_t pid;
+ pid = raw_clone(SIGCHLD);
+ if (pid < 0)
+ log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
+ else if (pid == 0)
+ exit(EXIT_SUCCESS);
+ else {
+ log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
+ (void) wait_for_terminate(pid, NULL);
+ }
+ }
+#endif
+}
+
+int pid_compare_func(const pid_t *a, const pid_t *b) {
+ /* Suitable for usage in qsort() */
+ return CMP(*a, *b);
+}
+
+int ioprio_parse_priority(const char *s, int *ret) {
+ int i, r;
+
+ assert(s);
+ assert(ret);
+
+ r = safe_atoi(s, &i);
+ if (r < 0)
+ return r;
+
+ if (!ioprio_priority_is_valid(i))
+ return -EINVAL;
+
+ *ret = i;
+ return 0;
+}
+
+/* The cached PID, possible values:
+ *
+ * == UNSET [0] → cache not initialized yet
+ * == BUSY [-1] → some thread is initializing it at the moment
+ * any other → the cached PID
+ */
+
+#define CACHED_PID_UNSET ((pid_t) 0)
+#define CACHED_PID_BUSY ((pid_t) -1)
+
+static pid_t cached_pid = CACHED_PID_UNSET;
+
+void reset_cached_pid(void) {
+ /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
+ cached_pid = CACHED_PID_UNSET;
+}
+
+/* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
+ * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
+ * libpthread, as it is part of glibc anyway. */
+extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void *dso_handle);
+extern void* __dso_handle _weak_;
+
+pid_t getpid_cached(void) {
+ static bool installed = false;
+ pid_t current_value;
+
+ /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
+ * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
+ * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
+ * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
+ *
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
+ * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
+ */
+
+ current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
+
+ switch (current_value) {
+
+ case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
+ pid_t new_pid;
+
+ new_pid = raw_getpid();
+
+ if (!installed) {
+ /* __register_atfork() either returns 0 or -ENOMEM, in its glibc implementation. Since it's
+ * only half-documented (glibc doesn't document it but LSB does — though only superficially)
+ * we'll check for errors only in the most generic fashion possible. */
+
+ if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
+ /* OOM? Let's try again later */
+ cached_pid = CACHED_PID_UNSET;
+ return new_pid;
+ }
+
+ installed = true;
+ }
+
+ cached_pid = new_pid;
+ return new_pid;
+ }
+
+ case CACHED_PID_BUSY: /* Somebody else is currently initializing */
+ return raw_getpid();
+
+ default: /* Properly initialized */
+ return current_value;
+ }
+}
+
+int must_be_root(void) {
+
+ if (geteuid() == 0)
+ return 0;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be root.");
+}
+
+int safe_fork_full(
+ const char *name,
+ const int except_fds[],
+ size_t n_except_fds,
+ ForkFlags flags,
+ pid_t *ret_pid) {
+
+ pid_t original_pid, pid;
+ sigset_t saved_ss, ss;
+ bool block_signals = false;
+ int prio, r;
+
+ /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
+ * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
+
+ prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG;
+
+ original_pid = getpid_cached();
+
+ if (flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG)) {
+ /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can
+ * be sure that SIGTERMs are not lost we might send to the child. */
+
+ assert_se(sigfillset(&ss) >= 0);
+ block_signals = true;
+
+ } else if (flags & FORK_WAIT) {
+ /* Let's block SIGCHLD at least, so that we can safely watch for the child process */
+
+ assert_se(sigemptyset(&ss) >= 0);
+ assert_se(sigaddset(&ss, SIGCHLD) >= 0);
+ block_signals = true;
+ }
+
+ if (block_signals)
+ if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
+ return log_full_errno(prio, errno, "Failed to set signal mask: %m");
+
+ if (flags & FORK_NEW_MOUNTNS)
+ pid = raw_clone(SIGCHLD|CLONE_NEWNS);
+ else
+ pid = fork();
+ if (pid < 0) {
+ r = -errno;
+
+ if (block_signals) /* undo what we did above */
+ (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
+
+ return log_full_errno(prio, r, "Failed to fork: %m");
+ }
+ if (pid > 0) {
+ /* We are in the parent process */
+
+ log_debug("Successfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
+
+ if (flags & FORK_WAIT) {
+ r = wait_for_terminate_and_check(name, pid, (flags & FORK_LOG ? WAIT_LOG : 0));
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
+ return -EPROTO;
+ }
+
+ if (block_signals) /* undo what we did above */
+ (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
+
+ if (ret_pid)
+ *ret_pid = pid;
+
+ return 1;
+ }
+
+ /* We are in the child process */
+
+ if (flags & FORK_REOPEN_LOG) {
+ /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
+ log_close();
+ log_set_open_when_needed(true);
+ }
+
+ if (name) {
+ r = rename_process(name);
+ if (r < 0)
+ log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG,
+ r, "Failed to rename process, ignoring: %m");
+ }
+
+ if (flags & FORK_DEATHSIG)
+ if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
+ log_full_errno(prio, errno, "Failed to set death signal: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (flags & FORK_RESET_SIGNALS) {
+ r = reset_all_signal_handlers();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to reset signal handlers: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
+ r = reset_signal_mask();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to reset signal mask: %m");
+ _exit(EXIT_FAILURE);
+ }
+ } else if (block_signals) { /* undo what we did above */
+ if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
+ log_full_errno(prio, errno, "Failed to restore signal mask: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (flags & FORK_DEATHSIG) {
+ pid_t ppid;
+ /* Let's see if the parent PID is still the one we started from? If not, then the parent
+ * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
+
+ ppid = getppid();
+ if (ppid == 0)
+ /* Parent is in a differn't PID namespace. */;
+ else if (ppid != original_pid) {
+ log_debug("Parent died early, raising SIGTERM.");
+ (void) raise(SIGTERM);
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) {
+
+ /* Optionally, make sure we never propagate mounts to the host. */
+
+ if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
+ log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (flags & FORK_CLOSE_ALL_FDS) {
+ /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
+ log_close();
+
+ r = close_all_fds(except_fds, n_except_fds);
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to close all file descriptors: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ /* When we were asked to reopen the logs, do so again now */
+ if (flags & FORK_REOPEN_LOG) {
+ log_open();
+ log_set_open_when_needed(false);
+ }
+
+ if (flags & FORK_NULL_STDIO) {
+ r = make_null_stdio();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (flags & FORK_RLIMIT_NOFILE_SAFE) {
+ r = rlimit_nofile_safe();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (ret_pid)
+ *ret_pid = getpid_cached();
+
+ return 0;
+}
+
+int namespace_fork(
+ const char *outer_name,
+ const char *inner_name,
+ const int except_fds[],
+ size_t n_except_fds,
+ ForkFlags flags,
+ int pidns_fd,
+ int mntns_fd,
+ int netns_fd,
+ int userns_fd,
+ int root_fd,
+ pid_t *ret_pid) {
+
+ int r;
+
+ /* This is much like safe_fork(), but forks twice, and joins the specified namespaces in the middle
+ * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that
+ * /proc/self/fd works correctly. */
+
+ r = safe_fork_full(outer_name, except_fds, n_except_fds, (flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ pid_t pid;
+
+ /* Child */
+
+ r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd);
+ if (r < 0) {
+ log_full_errno(FLAGS_SET(flags, FORK_LOG) ? LOG_ERR : LOG_DEBUG, r, "Failed to join namespace: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */
+ r = safe_fork_full(inner_name, except_fds, n_except_fds, flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_NULL_STDIO), &pid);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+ if (r == 0) {
+ /* Child */
+ if (ret_pid)
+ *ret_pid = pid;
+ return 0;
+ }
+
+ r = wait_for_terminate_and_check(inner_name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(r);
+ }
+
+ return 1;
+}
+
+int fork_agent(const char *name, const int except[], size_t n_except, pid_t *ret_pid, const char *path, ...) {
+ bool stdout_is_tty, stderr_is_tty;
+ size_t n, i;
+ va_list ap;
+ char **l;
+ int r;
+
+ assert(path);
+
+ /* Spawns a temporary TTY agent, making sure it goes away when we go away */
+
+ r = safe_fork_full(name, except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, ret_pid);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ /* In the child: */
+
+ stdout_is_tty = isatty(STDOUT_FILENO);
+ stderr_is_tty = isatty(STDERR_FILENO);
+
+ if (!stdout_is_tty || !stderr_is_tty) {
+ int fd;
+
+ /* Detach from stdout/stderr. and reopen
+ * /dev/tty for them. This is important to
+ * ensure that when systemctl is started via
+ * popen() or a similar call that expects to
+ * read EOF we actually do generate EOF and
+ * not delay this indefinitely by because we
+ * keep an unused copy of stdin around. */
+ fd = open("/dev/tty", O_WRONLY);
+ if (fd < 0) {
+ log_error_errno(errno, "Failed to open /dev/tty: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
+ log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
+ log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close_above_stdio(fd);
+ }
+
+ (void) rlimit_nofile_safe();
+
+ /* Count arguments */
+ va_start(ap, path);
+ for (n = 0; va_arg(ap, char*); n++)
+ ;
+ va_end(ap);
+
+ /* Allocate strv */
+ l = newa(char*, n + 1);
+
+ /* Fill in arguments */
+ va_start(ap, path);
+ for (i = 0; i <= n; i++)
+ l[i] = va_arg(ap, char*);
+ va_end(ap);
+
+ execv(path, l);
+ _exit(EXIT_FAILURE);
+}
+
+int set_oom_score_adjust(int value) {
+ char t[DECIMAL_STR_MAX(int)];
+
+ sprintf(t, "%i", value);
+
+ return write_string_file("/proc/self/oom_score_adj", t,
+ WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
+}
+
+static const char *const ioprio_class_table[] = {
+ [IOPRIO_CLASS_NONE] = "none",
+ [IOPRIO_CLASS_RT] = "realtime",
+ [IOPRIO_CLASS_BE] = "best-effort",
+ [IOPRIO_CLASS_IDLE] = "idle"
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, IOPRIO_N_CLASSES);
+
+static const char *const sigchld_code_table[] = {
+ [CLD_EXITED] = "exited",
+ [CLD_KILLED] = "killed",
+ [CLD_DUMPED] = "dumped",
+ [CLD_TRAPPED] = "trapped",
+ [CLD_STOPPED] = "stopped",
+ [CLD_CONTINUED] = "continued",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
+
+static const char* const sched_policy_table[] = {
+ [SCHED_OTHER] = "other",
+ [SCHED_BATCH] = "batch",
+ [SCHED_IDLE] = "idle",
+ [SCHED_FIFO] = "fifo",
+ [SCHED_RR] = "rr"
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);
diff --git a/src/basic/process-util.h b/src/basic/process-util.h
new file mode 100644
index 0000000..c85ea30
--- /dev/null
+++ b/src/basic/process-util.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <alloca.h>
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+
+#include "format-util.h"
+#include "ioprio.h"
+#include "macro.h"
+#include "time-util.h"
+
+#define procfs_file_alloca(pid, field) \
+ ({ \
+ pid_t _pid_ = (pid); \
+ const char *_r_; \
+ if (_pid_ == 0) { \
+ _r_ = ("/proc/self/" field); \
+ } else { \
+ _r_ = newa(char, STRLEN("/proc/") + DECIMAL_STR_MAX(pid_t) + 1 + sizeof(field)); \
+ sprintf((char*) _r_, "/proc/"PID_FMT"/" field, _pid_); \
+ } \
+ _r_; \
+ })
+
+int get_process_state(pid_t pid);
+int get_process_comm(pid_t pid, char **name);
+int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line);
+int get_process_exe(pid_t pid, char **name);
+int get_process_uid(pid_t pid, uid_t *uid);
+int get_process_gid(pid_t pid, gid_t *gid);
+int get_process_capeff(pid_t pid, char **capeff);
+int get_process_cwd(pid_t pid, char **cwd);
+int get_process_root(pid_t pid, char **root);
+int get_process_environ(pid_t pid, char **environ);
+int get_process_ppid(pid_t pid, pid_t *ppid);
+
+int wait_for_terminate(pid_t pid, siginfo_t *status);
+
+typedef enum WaitFlags {
+ WAIT_LOG_ABNORMAL = 1 << 0,
+ WAIT_LOG_NON_ZERO_EXIT_STATUS = 1 << 1,
+
+ /* A shortcut for requesting the most complete logging */
+ WAIT_LOG = WAIT_LOG_ABNORMAL|WAIT_LOG_NON_ZERO_EXIT_STATUS,
+} WaitFlags;
+
+int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags);
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout);
+
+void sigkill_wait(pid_t pid);
+void sigkill_waitp(pid_t *pid);
+void sigterm_wait(pid_t pid);
+
+int kill_and_sigcont(pid_t pid, int sig);
+
+int rename_process(const char name[]);
+int is_kernel_thread(pid_t pid);
+
+int getenv_for_pid(pid_t pid, const char *field, char **_value);
+
+bool pid_is_alive(pid_t pid);
+bool pid_is_unwaited(pid_t pid);
+int pid_from_same_root_fs(pid_t pid);
+
+bool is_main_thread(void);
+
+_noreturn_ void freeze(void);
+
+bool oom_score_adjust_is_valid(int oa);
+
+#ifndef PERSONALITY_INVALID
+/* personality(7) documents that 0xffffffffUL is used for querying the
+ * current personality, hence let's use that here as error
+ * indicator. */
+#define PERSONALITY_INVALID 0xffffffffLU
+#endif
+
+unsigned long personality_from_string(const char *p);
+const char *personality_to_string(unsigned long);
+
+int safe_personality(unsigned long p);
+int opinionated_personality(unsigned long *ret);
+
+int ioprio_class_to_string_alloc(int i, char **s);
+int ioprio_class_from_string(const char *s);
+
+const char *sigchld_code_to_string(int i) _const_;
+int sigchld_code_from_string(const char *s) _pure_;
+
+int sched_policy_to_string_alloc(int i, char **s);
+int sched_policy_from_string(const char *s);
+
+static inline pid_t PTR_TO_PID(const void *p) {
+ return (pid_t) ((uintptr_t) p);
+}
+
+static inline void* PID_TO_PTR(pid_t pid) {
+ return (void*) ((uintptr_t) pid);
+}
+
+void valgrind_summary_hack(void);
+
+int pid_compare_func(const pid_t *a, const pid_t *b);
+
+static inline bool nice_is_valid(int n) {
+ return n >= PRIO_MIN && n < PRIO_MAX;
+}
+
+static inline bool sched_policy_is_valid(int i) {
+ return IN_SET(i, SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, SCHED_FIFO, SCHED_RR);
+}
+
+static inline bool sched_priority_is_valid(int i) {
+ return i >= 0 && i <= sched_get_priority_max(SCHED_RR);
+}
+
+static inline bool ioprio_class_is_valid(int i) {
+ return IN_SET(i, IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE);
+}
+
+static inline bool ioprio_priority_is_valid(int i) {
+ return i >= 0 && i < IOPRIO_BE_NR;
+}
+
+static inline bool pid_is_valid(pid_t p) {
+ return p > 0;
+}
+
+int ioprio_parse_priority(const char *s, int *ret);
+
+pid_t getpid_cached(void);
+void reset_cached_pid(void);
+
+int must_be_root(void);
+
+typedef enum ForkFlags {
+ FORK_RESET_SIGNALS = 1 << 0, /* Reset all signal handlers and signal mask */
+ FORK_CLOSE_ALL_FDS = 1 << 1, /* Close all open file descriptors in the child, except for 0,1,2 */
+ FORK_DEATHSIG = 1 << 2, /* Set PR_DEATHSIG in the child */
+ FORK_NULL_STDIO = 1 << 3, /* Connect 0,1,2 to /dev/null */
+ FORK_REOPEN_LOG = 1 << 4, /* Reopen log connection */
+ FORK_LOG = 1 << 5, /* Log above LOG_DEBUG log level about failures */
+ FORK_WAIT = 1 << 6, /* Wait until child exited */
+ FORK_NEW_MOUNTNS = 1 << 7, /* Run child in its own mount namespace */
+ FORK_MOUNTNS_SLAVE = 1 << 8, /* Make child's mount namespace MS_SLAVE */
+ FORK_RLIMIT_NOFILE_SAFE = 1 << 9, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */
+} ForkFlags;
+
+int safe_fork_full(const char *name, const int except_fds[], size_t n_except_fds, ForkFlags flags, pid_t *ret_pid);
+
+static inline int safe_fork(const char *name, ForkFlags flags, pid_t *ret_pid) {
+ return safe_fork_full(name, NULL, 0, flags, ret_pid);
+}
+
+int namespace_fork(const char *outer_name, const char *inner_name, const int except_fds[], size_t n_except_fds, ForkFlags flags, int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd, pid_t *ret_pid);
+
+int fork_agent(const char *name, const int except[], size_t n_except, pid_t *pid, const char *path, ...) _sentinel_;
+
+int set_oom_score_adjust(int value);
+
+#if SIZEOF_PID_T == 4
+/* The highest possibly (theoretic) pid_t value on this architecture. */
+#define PID_T_MAX ((pid_t) INT32_MAX)
+/* The maximum number of concurrent processes Linux allows on this architecture, as well as the highest valid PID value
+ * the kernel will potentially assign. This reflects a value compiled into the kernel (PID_MAX_LIMIT), and sets the
+ * upper boundary on what may be written to the /proc/sys/kernel/pid_max sysctl (but do note that the sysctl is off by
+ * 1, since PID 0 can never exist and there can hence only be one process less than the limit would suggest). Since
+ * these values are documented in proc(5) we feel quite confident that they are stable enough for the near future at
+ * least to define them here too. */
+#define TASKS_MAX 4194303U
+#elif SIZEOF_PID_T == 2
+#define PID_T_MAX ((pid_t) INT16_MAX)
+#define TASKS_MAX 32767U
+#else
+#error "Unknown pid_t size"
+#endif
+
+assert_cc(TASKS_MAX <= (unsigned long) PID_T_MAX)
+
+/* Like TAKE_PTR() but for child PIDs, resetting them to 0 */
+#define TAKE_PID(pid) \
+ ({ \
+ pid_t _pid_ = (pid); \
+ (pid) = 0; \
+ _pid_; \
+ })
diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c
new file mode 100644
index 0000000..7aaf95b
--- /dev/null
+++ b/src/basic/procfs-util.c
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+int procfs_tasks_get_limit(uint64_t *ret) {
+ _cleanup_free_ char *value = NULL;
+ uint64_t pid_max, threads_max;
+ int r;
+
+ assert(ret);
+
+ /* So there are two sysctl files that control the system limit of processes:
+ *
+ * 1. kernel.threads-max: this is probably the sysctl that makes more sense, as it directly puts a limit on
+ * concurrent tasks.
+ *
+ * 2. kernel.pid_max: this limits the numeric range PIDs can take, and thus indirectly also limits the number
+ * of concurrent threads. AFAICS it's primarily a compatibility concept: some crappy old code used a signed
+ * 16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond INT16_MAX by
+ * default.
+ *
+ * By default #2 is set to much lower values than #1, hence the limit people come into contact with first, as
+ * it's the lowest boundary they need to bump when they want higher number of processes.
+ *
+ * Also note the weird definition of #2: PIDs assigned will be kept below this value, which means the number of
+ * tasks that can be created is one lower, as PID 0 is not a valid process ID. */
+
+ r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(value, &pid_max);
+ if (r < 0)
+ return r;
+
+ value = mfree(value);
+ r = read_one_line_file("/proc/sys/kernel/threads-max", &value);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(value, &threads_max);
+ if (r < 0)
+ return r;
+
+ /* Subtract one from pid_max, since PID 0 is not a valid PID */
+ *ret = MIN(pid_max-1, threads_max);
+ return 0;
+}
+
+int procfs_tasks_set_limit(uint64_t limit) {
+ char buffer[DECIMAL_STR_MAX(uint64_t)+1];
+ _cleanup_free_ char *value = NULL;
+ uint64_t pid_max;
+ int r;
+
+ if (limit == 0) /* This makes no sense, we are userspace and hence count as tasks too, and we want to live,
+ * hence the limit conceptually has to be above 0. Also, most likely if anyone asks for a zero
+ * limit he/she probably means "no limit", hence let's better refuse this to avoid
+ * confusion. */
+ return -EINVAL;
+
+ /* The Linux kernel doesn't allow this value to go below 20, hence don't allow this either, higher values than
+ * TASKS_MAX are not accepted by the pid_max sysctl. We'll treat anything this high as "unbounded" and hence
+ * set it to the maximum. */
+ limit = CLAMP(limit, 20U, TASKS_MAX);
+
+ r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
+ if (r < 0)
+ return r;
+ r = safe_atou64(value, &pid_max);
+ if (r < 0)
+ return r;
+
+ /* As pid_max is about the numeric pid_t range we'll bump it if necessary, but only ever increase it, never
+ * decrease it, as threads-max is the much more relevant sysctl. */
+ if (limit > pid_max-1) {
+ sprintf(buffer, "%" PRIu64, limit+1); /* Add one, since PID 0 is not a valid PID */
+ r = write_string_file("/proc/sys/kernel/pid_max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+ }
+
+ sprintf(buffer, "%" PRIu64, limit);
+ r = write_string_file("/proc/sys/kernel/threads-max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ uint64_t threads_max;
+
+ /* Hmm, we couldn't write this? If so, maybe it was already set properly? In that case let's not
+ * generate an error */
+
+ value = mfree(value);
+ if (read_one_line_file("/proc/sys/kernel/threads-max", &value) < 0)
+ return r; /* return original error */
+
+ if (safe_atou64(value, &threads_max) < 0)
+ return r; /* return original error */
+
+ if (MIN(pid_max-1, threads_max) != limit)
+ return r; /* return original error */
+
+ /* Yay! Value set already matches what we were trying to set, hence consider this a success. */
+ }
+
+ return 0;
+}
+
+int procfs_tasks_get_current(uint64_t *ret) {
+ _cleanup_free_ char *value = NULL;
+ const char *p, *nr;
+ size_t n;
+ int r;
+
+ assert(ret);
+
+ r = read_one_line_file("/proc/loadavg", &value);
+ if (r < 0)
+ return r;
+
+ /* Look for the second part of the fourth field, which is separated by a slash from the first part. None of the
+ * earlier fields use a slash, hence let's use this to find the right spot. */
+ p = strchr(value, '/');
+ if (!p)
+ return -EINVAL;
+
+ p++;
+ n = strspn(p, DIGITS);
+ nr = strndupa(p, n);
+
+ return safe_atou64(nr, ret);
+}
+
+static uint64_t calc_gcd64(uint64_t a, uint64_t b) {
+
+ while (b > 0) {
+ uint64_t t;
+
+ t = a % b;
+
+ a = b;
+ b = t;
+ }
+
+ return a;
+}
+
+int procfs_cpu_get_usage(nsec_t *ret) {
+ _cleanup_free_ char *first_line = NULL;
+ unsigned long user_ticks, nice_ticks, system_ticks, irq_ticks, softirq_ticks,
+ guest_ticks = 0, guest_nice_ticks = 0;
+ long ticks_per_second;
+ uint64_t sum, gcd, a, b;
+ const char *p;
+ int r;
+
+ assert(ret);
+
+ r = read_one_line_file("/proc/stat", &first_line);
+ if (r < 0)
+ return r;
+
+ p = first_word(first_line, "cpu");
+ if (!p)
+ return -EINVAL;
+
+ if (sscanf(p, "%lu %lu %lu %*u %*u %lu %lu %*u %lu %lu",
+ &user_ticks,
+ &nice_ticks,
+ &system_ticks,
+ &irq_ticks,
+ &softirq_ticks,
+ &guest_ticks,
+ &guest_nice_ticks) < 5) /* we only insist on the first five fields */
+ return -EINVAL;
+
+ ticks_per_second = sysconf(_SC_CLK_TCK);
+ if (ticks_per_second < 0)
+ return -errno;
+ assert(ticks_per_second > 0);
+
+ sum = (uint64_t) user_ticks + (uint64_t) nice_ticks + (uint64_t) system_ticks +
+ (uint64_t) irq_ticks + (uint64_t) softirq_ticks +
+ (uint64_t) guest_ticks + (uint64_t) guest_nice_ticks;
+
+ /* Let's reduce this fraction before we apply it to avoid overflows when converting this to µsec */
+ gcd = calc_gcd64(NSEC_PER_SEC, ticks_per_second);
+
+ a = (uint64_t) NSEC_PER_SEC / gcd;
+ b = (uint64_t) ticks_per_second / gcd;
+
+ *ret = DIV_ROUND_UP((nsec_t) sum * (nsec_t) a, (nsec_t) b);
+ return 0;
+}
+
+int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used) {
+ uint64_t mem_total = UINT64_MAX, mem_free = UINT64_MAX;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ f = fopen("/proc/meminfo", "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uint64_t *v;
+ char *p, *e;
+ size_t n;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL; /* EOF: Couldn't find one or both fields? */
+
+ p = first_word(line, "MemTotal:");
+ if (p)
+ v = &mem_total;
+ else {
+ p = first_word(line, "MemFree:");
+ if (p)
+ v = &mem_free;
+ else
+ continue;
+ }
+
+ /* Determine length of numeric value */
+ n = strspn(p, DIGITS);
+ if (n == 0)
+ return -EINVAL;
+ e = p + n;
+
+ /* Ensure the line ends in " kB" */
+ n = strspn(e, WHITESPACE);
+ if (n == 0)
+ return -EINVAL;
+ if (!streq(e + n, "kB"))
+ return -EINVAL;
+
+ *e = 0;
+ r = safe_atou64(p, v);
+ if (r < 0)
+ return r;
+ if (*v == UINT64_MAX)
+ return -EINVAL;
+
+ if (mem_total != UINT64_MAX && mem_free != UINT64_MAX)
+ break;
+ }
+
+ if (mem_free > mem_total)
+ return -EINVAL;
+
+ if (ret_total)
+ *ret_total = mem_total * 1024U;
+ if (ret_used)
+ *ret_used = (mem_total - mem_free) * 1024U;
+ return 0;
+}
diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h
new file mode 100644
index 0000000..5a44e9e
--- /dev/null
+++ b/src/basic/procfs-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+
+#include "time-util.h"
+
+int procfs_tasks_get_limit(uint64_t *ret);
+int procfs_tasks_set_limit(uint64_t limit);
+int procfs_tasks_get_current(uint64_t *ret);
+
+int procfs_cpu_get_usage(nsec_t *ret);
+
+int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used);
+static inline int procfs_memory_get_used(uint64_t *ret) {
+ return procfs_memory_get(NULL, ret);
+}
diff --git a/src/basic/random-util.c b/src/basic/random-util.c
new file mode 100644
index 0000000..f7decf6
--- /dev/null
+++ b/src/basic/random-util.c
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#include <cpuid.h>
+#endif
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+
+#if HAVE_SYS_AUXV_H
+# include <sys/auxv.h>
+#endif
+
+#if USE_SYS_RANDOM_H
+# include <sys/random.h>
+#else
+# include <linux/random.h>
+#endif
+
+#include "fd-util.h"
+#include "io-util.h"
+#include "missing.h"
+#include "random-util.h"
+#include "time-util.h"
+
+#if HAS_FEATURE_MEMORY_SANITIZER
+#include <sanitizer/msan_interface.h>
+#endif
+
+int rdrand(unsigned long *ret) {
+
+#if defined(__i386__) || defined(__x86_64__)
+ static int have_rdrand = -1;
+ unsigned char err;
+
+ if (have_rdrand < 0) {
+ uint32_t eax, ebx, ecx, edx;
+
+ /* Check if RDRAND is supported by the CPU */
+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0) {
+ have_rdrand = false;
+ return -EOPNOTSUPP;
+ }
+
+ have_rdrand = !!(ecx & (1U << 30));
+ }
+
+ if (have_rdrand == 0)
+ return -EOPNOTSUPP;
+
+ asm volatile("rdrand %0;"
+ "setc %1"
+ : "=r" (*ret),
+ "=qm" (err));
+
+#if HAS_FEATURE_MEMORY_SANITIZER
+ __msan_unpoison(&err, sizeof(err));
+#endif
+
+ if (!err)
+ return -EAGAIN;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+int genuine_random_bytes(void *p, size_t n, RandomFlags flags) {
+ static int have_syscall = -1;
+ _cleanup_close_ int fd = -1;
+ bool got_some = false;
+ int r;
+
+ /* Gathers some randomness from the kernel (or the CPU if the RANDOM_ALLOW_RDRAND flag is set). This call won't
+ * block, unless the RANDOM_BLOCK flag is set. If RANDOM_DONT_DRAIN is set, an error is returned if the random
+ * pool is not initialized. Otherwise it will always return some data from the kernel, regardless of whether
+ * the random pool is fully initialized or not. */
+
+ if (n == 0)
+ return 0;
+
+ if (FLAGS_SET(flags, RANDOM_ALLOW_RDRAND))
+ /* Try x86-64' RDRAND intrinsic if we have it. We only use it if high quality randomness is not
+ * required, as we don't trust it (who does?). Note that we only do a single iteration of RDRAND here,
+ * even though the Intel docs suggest calling this in a tight loop of 10 invocations or so. That's
+ * because we don't really care about the quality here. We generally prefer using RDRAND if the caller
+ * allows us too, since this way we won't drain the kernel randomness pool if we don't need it, as the
+ * pool's entropy is scarce. */
+ for (;;) {
+ unsigned long u;
+ size_t m;
+
+ if (rdrand(&u) < 0) {
+ if (got_some && FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) {
+ /* Fill in the remaining bytes using pseudo-random values */
+ pseudo_random_bytes(p, n);
+ return 0;
+ }
+
+ /* OK, this didn't work, let's go to getrandom() + /dev/urandom instead */
+ break;
+ }
+
+ m = MIN(sizeof(u), n);
+ memcpy(p, &u, m);
+
+ p = (uint8_t*) p + m;
+ n -= m;
+
+ if (n == 0)
+ return 0; /* Yay, success! */
+
+ got_some = true;
+ }
+
+ /* Use the getrandom() syscall unless we know we don't have it. */
+ if (have_syscall != 0 && !HAS_FEATURE_MEMORY_SANITIZER) {
+
+ for (;;) {
+ r = getrandom(p, n, FLAGS_SET(flags, RANDOM_BLOCK) ? 0 : GRND_NONBLOCK);
+ if (r > 0) {
+ have_syscall = true;
+
+ if ((size_t) r == n)
+ return 0; /* Yay, success! */
+
+ assert((size_t) r < n);
+ p = (uint8_t*) p + r;
+ n -= r;
+
+ if (FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) {
+ /* Fill in the remaining bytes using pseudo-random values */
+ pseudo_random_bytes(p, n);
+ return 0;
+ }
+
+ got_some = true;
+
+ /* Hmm, we didn't get enough good data but the caller insists on good data? Then try again */
+ if (FLAGS_SET(flags, RANDOM_BLOCK))
+ continue;
+
+ /* Fill in the rest with /dev/urandom */
+ break;
+
+ } else if (r == 0) {
+ have_syscall = true;
+ return -EIO;
+
+ } else if (errno == ENOSYS) {
+ /* We lack the syscall, continue with reading from /dev/urandom. */
+ have_syscall = false;
+ break;
+
+ } else if (errno == EAGAIN) {
+ /* The kernel has no entropy whatsoever. Let's remember to use the syscall the next
+ * time again though.
+ *
+ * If RANDOM_DONT_DRAIN is set, return an error so that random_bytes() can produce some
+ * pseudo-random bytes instead. Otherwise, fall back to /dev/urandom, which we know is empty,
+ * but the kernel will produce some bytes for us on a best-effort basis. */
+ have_syscall = true;
+
+ if (got_some && FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) {
+ /* Fill in the remaining bytes using pseudorandom values */
+ pseudo_random_bytes(p, n);
+ return 0;
+ }
+
+ if (FLAGS_SET(flags, RANDOM_DONT_DRAIN))
+ return -ENODATA;
+
+ /* Use /dev/urandom instead */
+ break;
+ } else
+ return -errno;
+ }
+ }
+
+ fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return errno == ENOENT ? -ENOSYS : -errno;
+
+ return loop_read_exact(fd, p, n, true);
+}
+
+void initialize_srand(void) {
+ static bool srand_called = false;
+ unsigned x;
+#if HAVE_SYS_AUXV_H
+ const void *auxv;
+#endif
+ unsigned long k;
+
+ if (srand_called)
+ return;
+
+#if HAVE_SYS_AUXV_H
+ /* The kernel provides us with 16 bytes of entropy in auxv, so let's
+ * try to make use of that to seed the pseudo-random generator. It's
+ * better than nothing... */
+
+ auxv = (const void*) getauxval(AT_RANDOM);
+ if (auxv) {
+ assert_cc(sizeof(x) <= 16);
+ memcpy(&x, auxv, sizeof(x));
+ } else
+#endif
+ x = 0;
+
+ x ^= (unsigned) now(CLOCK_REALTIME);
+ x ^= (unsigned) gettid();
+
+ if (rdrand(&k) >= 0)
+ x ^= (unsigned) k;
+
+ srand(x);
+ srand_called = true;
+}
+
+/* INT_MAX gives us only 31 bits, so use 24 out of that. */
+#if RAND_MAX >= INT_MAX
+# define RAND_STEP 3
+#else
+/* SHORT_INT_MAX or lower gives at most 15 bits, we just just 8 out of that. */
+# define RAND_STEP 1
+#endif
+
+void pseudo_random_bytes(void *p, size_t n) {
+ uint8_t *q;
+
+ initialize_srand();
+
+ for (q = p; q < (uint8_t*) p + n; q += RAND_STEP) {
+ unsigned rr;
+
+ rr = (unsigned) rand();
+
+#if RAND_STEP >= 3
+ if ((size_t) (q - (uint8_t*) p + 2) < n)
+ q[2] = rr >> 16;
+#endif
+#if RAND_STEP >= 2
+ if ((size_t) (q - (uint8_t*) p + 1) < n)
+ q[1] = rr >> 8;
+#endif
+ q[0] = rr;
+ }
+}
+
+void random_bytes(void *p, size_t n) {
+
+ if (genuine_random_bytes(p, n, RANDOM_EXTEND_WITH_PSEUDO|RANDOM_DONT_DRAIN|RANDOM_ALLOW_RDRAND) >= 0)
+ return;
+
+ /* If for some reason some user made /dev/urandom unavailable to us, or the kernel has no entropy, use a PRNG instead. */
+ pseudo_random_bytes(p, n);
+}
diff --git a/src/basic/random-util.h b/src/basic/random-util.h
new file mode 100644
index 0000000..3e8c288
--- /dev/null
+++ b/src/basic/random-util.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+typedef enum RandomFlags {
+ RANDOM_EXTEND_WITH_PSEUDO = 1 << 0, /* If we can't get enough genuine randomness, but some, fill up the rest with pseudo-randomness */
+ RANDOM_BLOCK = 1 << 1, /* Rather block than return crap randomness (only if the kernel supports that) */
+ RANDOM_DONT_DRAIN = 1 << 2, /* If we can't get any randomness at all, return early with -EAGAIN */
+ RANDOM_ALLOW_RDRAND = 1 << 3, /* Allow usage of the CPU RNG */
+} RandomFlags;
+
+int genuine_random_bytes(void *p, size_t n, RandomFlags flags); /* returns "genuine" randomness, optionally filled upwith pseudo random, if not enough is available */
+void pseudo_random_bytes(void *p, size_t n); /* returns only pseudo-randommess (but possibly seeded from something better) */
+void random_bytes(void *p, size_t n); /* returns genuine randomness if cheaply available, and pseudo randomness if not. */
+
+void initialize_srand(void);
+
+static inline uint64_t random_u64(void) {
+ uint64_t u;
+ random_bytes(&u, sizeof(u));
+ return u;
+}
+
+static inline uint32_t random_u32(void) {
+ uint32_t u;
+ random_bytes(&u, sizeof(u));
+ return u;
+}
+
+int rdrand(unsigned long *ret);
diff --git a/src/basic/ratelimit.c b/src/basic/ratelimit.c
new file mode 100644
index 0000000..4e04e04
--- /dev/null
+++ b/src/basic/ratelimit.c
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/time.h>
+
+#include "macro.h"
+#include "ratelimit.h"
+
+/* Modelled after Linux' lib/ratelimit.c by Dave Young
+ * <hidave.darkstar@gmail.com>, which is licensed GPLv2. */
+
+bool ratelimit_below(RateLimit *r) {
+ usec_t ts;
+
+ assert(r);
+
+ if (r->interval <= 0 || r->burst <= 0)
+ return true;
+
+ ts = now(CLOCK_MONOTONIC);
+
+ if (r->begin <= 0 ||
+ r->begin + r->interval < ts) {
+ r->begin = ts;
+
+ /* Reset counter */
+ r->num = 0;
+ goto good;
+ }
+
+ if (r->num < r->burst)
+ goto good;
+
+ return false;
+
+good:
+ r->num++;
+ return true;
+}
diff --git a/src/basic/ratelimit.h b/src/basic/ratelimit.h
new file mode 100644
index 0000000..de91def
--- /dev/null
+++ b/src/basic/ratelimit.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+#include "util.h"
+
+typedef struct RateLimit {
+ usec_t interval;
+ usec_t begin;
+ unsigned burst;
+ unsigned num;
+} RateLimit;
+
+#define RATELIMIT_DEFINE(_name, _interval, _burst) \
+ RateLimit _name = { \
+ .interval = (_interval), \
+ .burst = (_burst), \
+ .num = 0, \
+ .begin = 0 \
+ }
+
+#define RATELIMIT_INIT(v, _interval, _burst) \
+ do { \
+ RateLimit *_r = &(v); \
+ _r->interval = (_interval); \
+ _r->burst = (_burst); \
+ _r->num = 0; \
+ _r->begin = 0; \
+ } while (false)
+
+#define RATELIMIT_RESET(v) \
+ do { \
+ RateLimit *_r = &(v); \
+ _r->num = 0; \
+ _r->begin = 0; \
+ } while (false)
+
+bool ratelimit_below(RateLimit *r);
diff --git a/src/basic/raw-clone.h b/src/basic/raw-clone.h
new file mode 100644
index 0000000..b8857b0
--- /dev/null
+++ b/src/basic/raw-clone.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2016 Michael Karcher
+***/
+
+#include <errno.h>
+#include <sched.h>
+#include <sys/syscall.h>
+
+#include "log.h"
+#include "macro.h"
+
+/**
+ * raw_clone() - uses clone to create a new process with clone flags
+ * @flags: Flags to pass to the clone system call
+ *
+ * Uses the clone system call to create a new process with the cloning flags and termination signal passed in the flags
+ * parameter. Opposed to glibc's clone funtion, using this function does not set up a separate stack for the child, but
+ * relies on copy-on-write semantics on the one stack at a common virtual address, just as fork does.
+ *
+ * To obtain copy-on-write semantics, flags must not contain CLONE_VM, and thus CLONE_THREAD and CLONE_SIGHAND (which
+ * require CLONE_VM) are not usable.
+ *
+ * Additionally, as this function does not pass the ptid, newtls and ctid parameters to the kernel, flags must not
+ * contain CLONE_PARENT_SETTID, CLONE_CHILD_SETTID, CLONE_CHILD_CLEARTID or CLONE_SETTLS.
+ *
+ * Returns: 0 in the child process and the child process id in the parent.
+ */
+static inline pid_t raw_clone(unsigned long flags) {
+ pid_t ret;
+
+ assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID|
+ CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0);
+#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
+ /* On s390/s390x and cris the order of the first and second arguments
+ * of the raw clone() system call is reversed. */
+ ret = (pid_t) syscall(__NR_clone, NULL, flags);
+#elif defined(__sparc__)
+ {
+ /**
+ * sparc always returns the other process id in %o0, and
+ * a boolean flag whether this is the child or the parent in
+ * %o1. Inline assembly is needed to get the flag returned
+ * in %o1.
+ */
+ int in_child, child_pid, error;
+
+ asm volatile("mov %3, %%g1\n\t"
+ "mov %4, %%o0\n\t"
+ "mov 0 , %%o1\n\t"
+#if defined(__arch64__)
+ "t 0x6d\n\t"
+#else
+ "t 0x10\n\t"
+#endif
+ "addx %%g0, 0, %2\n\t"
+ "mov %%o1, %0\n\t"
+ "mov %%o0, %1" :
+ "=r"(in_child), "=r"(child_pid), "=r"(error) :
+ "i"(__NR_clone), "r"(flags) :
+ "%o1", "%o0", "%g1", "cc" );
+
+ if (error) {
+ errno = child_pid;
+ ret = -1;
+ } else
+ ret = in_child ? 0 : child_pid;
+ }
+#else
+ ret = (pid_t) syscall(__NR_clone, flags, NULL);
+#endif
+
+ if (ret == 0)
+ reset_cached_pid();
+
+ return ret;
+}
diff --git a/src/basic/raw-reboot.h b/src/basic/raw-reboot.h
new file mode 100644
index 0000000..8ecefe9
--- /dev/null
+++ b/src/basic/raw-reboot.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/reboot.h>
+#include <sys/reboot.h>
+#include <sys/syscall.h>
+
+/* glibc defines the reboot() API call, which is a wrapper around the system call of the same name, but without the
+ * extra "arg" parameter. Since we need that parameter for some calls, let's add a "raw" wrapper that is defined the
+ * same way, except it takes the additional argument. */
+
+static inline int raw_reboot(int cmd, const void *arg) {
+ return (int) syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, arg);
+}
diff --git a/src/basic/refcnt.h b/src/basic/refcnt.h
new file mode 100644
index 0000000..40f9a84
--- /dev/null
+++ b/src/basic/refcnt.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* A type-safe atomic refcounter.
+ *
+ * DO NOT USE THIS UNLESS YOU ACTUALLY CARE ABOUT THREAD SAFETY! */
+
+typedef struct {
+ volatile unsigned _value;
+} RefCount;
+
+#define REFCNT_GET(r) ((r)._value)
+#define REFCNT_INC(r) (__sync_add_and_fetch(&(r)._value, 1))
+#define REFCNT_DEC(r) (__sync_sub_and_fetch(&(r)._value, 1))
+
+#define REFCNT_INIT ((RefCount) { ._value = 1 })
+
+#define _DEFINE_ATOMIC_REF_FUNC(type, name, scope) \
+ scope type *name##_ref(type *p) { \
+ if (!p) \
+ return NULL; \
+ \
+ assert_se(REFCNT_INC(p->n_ref) >= 2); \
+ return p; \
+ }
+
+#define _DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func, scope) \
+ scope type *name##_unref(type *p) { \
+ if (!p) \
+ return NULL; \
+ \
+ if (REFCNT_DEC(p->n_ref) > 0) \
+ return NULL; \
+ \
+ return free_func(p); \
+ }
+
+#define DEFINE_ATOMIC_REF_FUNC(type, name) \
+ _DEFINE_ATOMIC_REF_FUNC(type, name,)
+#define DEFINE_PUBLIC_ATOMIC_REF_FUNC(type, name) \
+ _DEFINE_ATOMIC_REF_FUNC(type, name, _public_)
+
+#define DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func,)
+#define DEFINE_PUBLIC_ATOMIC_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func, _public_)
+
+#define DEFINE_ATOMIC_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_ATOMIC_REF_FUNC(type, name); \
+ DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func);
+
+#define DEFINE_PUBLIC_ATOMIC_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_PUBLIC_ATOMIC_REF_FUNC(type, name); \
+ DEFINE_PUBLIC_ATOMIC_UNREF_FUNC(type, name, free_func);
diff --git a/src/basic/replace-var.c b/src/basic/replace-var.c
new file mode 100644
index 0000000..fd2b5c1
--- /dev/null
+++ b/src/basic/replace-var.c
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "replace-var.h"
+#include "string-util.h"
+
+/*
+ * Generic infrastructure for replacing @FOO@ style variables in
+ * strings. Will call a callback for each replacement.
+ */
+
+static int get_variable(const char *b, char **r) {
+ size_t k;
+ char *t;
+
+ assert(b);
+ assert(r);
+
+ if (*b != '@')
+ return 0;
+
+ k = strspn(b + 1, UPPERCASE_LETTERS "_");
+ if (k <= 0 || b[k+1] != '@')
+ return 0;
+
+ t = strndup(b + 1, k);
+ if (!t)
+ return -ENOMEM;
+
+ *r = t;
+ return 1;
+}
+
+char *replace_var(const char *text, char *(*lookup)(const char *variable, void *userdata), void *userdata) {
+ char *r, *t;
+ const char *f;
+ size_t l;
+
+ assert(text);
+ assert(lookup);
+
+ l = strlen(text);
+ r = new(char, l+1);
+ if (!r)
+ return NULL;
+
+ f = text;
+ t = r;
+ while (*f) {
+ _cleanup_free_ char *v = NULL, *n = NULL;
+ char *a;
+ int k;
+ size_t skip, d, nl;
+
+ k = get_variable(f, &v);
+ if (k < 0)
+ goto oom;
+ if (k == 0) {
+ *(t++) = *(f++);
+ continue;
+ }
+
+ n = lookup(v, userdata);
+ if (!n)
+ goto oom;
+
+ skip = strlen(v) + 2;
+
+ d = t - r;
+ nl = l - skip + strlen(n);
+ a = realloc(r, nl + 1);
+ if (!a)
+ goto oom;
+
+ l = nl;
+ r = a;
+ t = r + d;
+
+ t = stpcpy(t, n);
+ f += skip;
+ }
+
+ *t = 0;
+ return r;
+
+oom:
+ return mfree(r);
+}
diff --git a/src/basic/replace-var.h b/src/basic/replace-var.h
new file mode 100644
index 0000000..e6a489f
--- /dev/null
+++ b/src/basic/replace-var.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+char *replace_var(const char *text, char *(*lookup)(const char *variable, void *userdata), void *userdata);
diff --git a/src/basic/rlimit-util.c b/src/basic/rlimit-util.c
new file mode 100644
index 0000000..74b3a02
--- /dev/null
+++ b/src/basic/rlimit-util.c
@@ -0,0 +1,410 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/resource.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "rlimit-util.h"
+#include "string-table.h"
+#include "time-util.h"
+
+int setrlimit_closest(int resource, const struct rlimit *rlim) {
+ struct rlimit highest, fixed;
+
+ assert(rlim);
+
+ if (setrlimit(resource, rlim) >= 0)
+ return 0;
+
+ if (errno != EPERM)
+ return -errno;
+
+ /* So we failed to set the desired setrlimit, then let's try
+ * to get as close as we can */
+ if (getrlimit(resource, &highest) < 0)
+ return -errno;
+
+ /* If the hard limit is unbounded anyway, then the EPERM had other reasons, let's propagate the original EPERM
+ * then */
+ if (highest.rlim_max == RLIM_INFINITY)
+ return -EPERM;
+
+ fixed = (struct rlimit) {
+ .rlim_cur = MIN(rlim->rlim_cur, highest.rlim_max),
+ .rlim_max = MIN(rlim->rlim_max, highest.rlim_max),
+ };
+
+ /* Shortcut things if we wouldn't change anything. */
+ if (fixed.rlim_cur == highest.rlim_cur &&
+ fixed.rlim_max == highest.rlim_max)
+ return 0;
+
+ if (setrlimit(resource, &fixed) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int setrlimit_closest_all(const struct rlimit *const *rlim, int *which_failed) {
+ int i, r;
+
+ assert(rlim);
+
+ /* On failure returns the limit's index that failed in *which_failed, but only if non-NULL */
+
+ for (i = 0; i < _RLIMIT_MAX; i++) {
+ if (!rlim[i])
+ continue;
+
+ r = setrlimit_closest(i, rlim[i]);
+ if (r < 0) {
+ if (which_failed)
+ *which_failed = i;
+
+ return r;
+ }
+ }
+
+ if (which_failed)
+ *which_failed = -1;
+
+ return 0;
+}
+
+static int rlimit_parse_u64(const char *val, rlim_t *ret) {
+ uint64_t u;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ /* setrlimit(2) suggests rlim_t is always 64bit on Linux. */
+ assert_cc(sizeof(rlim_t) == sizeof(uint64_t));
+
+ r = safe_atou64(val, &u);
+ if (r < 0)
+ return r;
+ if (u >= (uint64_t) RLIM_INFINITY)
+ return -ERANGE;
+
+ *ret = (rlim_t) u;
+ return 0;
+}
+
+static int rlimit_parse_size(const char *val, rlim_t *ret) {
+ uint64_t u;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ r = parse_size(val, 1024, &u);
+ if (r < 0)
+ return r;
+ if (u >= (uint64_t) RLIM_INFINITY)
+ return -ERANGE;
+
+ *ret = (rlim_t) u;
+ return 0;
+}
+
+static int rlimit_parse_sec(const char *val, rlim_t *ret) {
+ uint64_t u;
+ usec_t t;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ r = parse_sec(val, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ u = (uint64_t) DIV_ROUND_UP(t, USEC_PER_SEC);
+ if (u >= (uint64_t) RLIM_INFINITY)
+ return -ERANGE;
+
+ *ret = (rlim_t) u;
+ return 0;
+}
+
+static int rlimit_parse_usec(const char *val, rlim_t *ret) {
+ usec_t t;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ r = parse_time(val, &t, 1);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ *ret = (rlim_t) t;
+ return 0;
+}
+
+static int rlimit_parse_nice(const char *val, rlim_t *ret) {
+ uint64_t rl;
+ int r;
+
+ /* So, Linux is weird. The range for RLIMIT_NICE is 40..1, mapping to the nice levels -20..19. However, the
+ * RLIMIT_NICE limit defaults to 0 by the kernel, i.e. a value that maps to nice level 20, which of course is
+ * bogus and does not exist. In order to permit parsing the RLIMIT_NICE of 0 here we hence implement a slight
+ * asymmetry: when parsing as positive nice level we permit 0..19. When parsing as negative nice level, we
+ * permit -20..0. But when parsing as raw resource limit value then we also allow the special value 0.
+ *
+ * Yeah, Linux is quality engineering sometimes... */
+
+ if (val[0] == '+') {
+
+ /* Prefixed with "+": Parse as positive user-friendly nice value */
+ r = safe_atou64(val + 1, &rl);
+ if (r < 0)
+ return r;
+
+ if (rl >= PRIO_MAX)
+ return -ERANGE;
+
+ rl = 20 - rl;
+
+ } else if (val[0] == '-') {
+
+ /* Prefixed with "-": Parse as negative user-friendly nice value */
+ r = safe_atou64(val + 1, &rl);
+ if (r < 0)
+ return r;
+
+ if (rl > (uint64_t) (-PRIO_MIN))
+ return -ERANGE;
+
+ rl = 20 + rl;
+ } else {
+
+ /* Not prefixed: parse as raw resource limit value */
+ r = safe_atou64(val, &rl);
+ if (r < 0)
+ return r;
+
+ if (rl > (uint64_t) (20 - PRIO_MIN))
+ return -ERANGE;
+ }
+
+ *ret = (rlim_t) rl;
+ return 0;
+}
+
+static int (*const rlimit_parse_table[_RLIMIT_MAX])(const char *val, rlim_t *ret) = {
+ [RLIMIT_CPU] = rlimit_parse_sec,
+ [RLIMIT_FSIZE] = rlimit_parse_size,
+ [RLIMIT_DATA] = rlimit_parse_size,
+ [RLIMIT_STACK] = rlimit_parse_size,
+ [RLIMIT_CORE] = rlimit_parse_size,
+ [RLIMIT_RSS] = rlimit_parse_size,
+ [RLIMIT_NOFILE] = rlimit_parse_u64,
+ [RLIMIT_AS] = rlimit_parse_size,
+ [RLIMIT_NPROC] = rlimit_parse_u64,
+ [RLIMIT_MEMLOCK] = rlimit_parse_size,
+ [RLIMIT_LOCKS] = rlimit_parse_u64,
+ [RLIMIT_SIGPENDING] = rlimit_parse_u64,
+ [RLIMIT_MSGQUEUE] = rlimit_parse_size,
+ [RLIMIT_NICE] = rlimit_parse_nice,
+ [RLIMIT_RTPRIO] = rlimit_parse_u64,
+ [RLIMIT_RTTIME] = rlimit_parse_usec,
+};
+
+int rlimit_parse_one(int resource, const char *val, rlim_t *ret) {
+ assert(val);
+ assert(ret);
+
+ if (resource < 0)
+ return -EINVAL;
+ if (resource >= _RLIMIT_MAX)
+ return -EINVAL;
+
+ return rlimit_parse_table[resource](val, ret);
+}
+
+int rlimit_parse(int resource, const char *val, struct rlimit *ret) {
+ _cleanup_free_ char *hard = NULL, *soft = NULL;
+ rlim_t hl, sl;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ r = extract_first_word(&val, &soft, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ r = rlimit_parse_one(resource, soft, &sl);
+ if (r < 0)
+ return r;
+
+ r = extract_first_word(&val, &hard, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (!isempty(val))
+ return -EINVAL;
+ if (r == 0)
+ hl = sl;
+ else {
+ r = rlimit_parse_one(resource, hard, &hl);
+ if (r < 0)
+ return r;
+ if (sl > hl)
+ return -EILSEQ;
+ }
+
+ *ret = (struct rlimit) {
+ .rlim_cur = sl,
+ .rlim_max = hl,
+ };
+
+ return 0;
+}
+
+int rlimit_format(const struct rlimit *rl, char **ret) {
+ char *s = NULL;
+
+ assert(rl);
+ assert(ret);
+
+ if (rl->rlim_cur >= RLIM_INFINITY && rl->rlim_max >= RLIM_INFINITY)
+ s = strdup("infinity");
+ else if (rl->rlim_cur >= RLIM_INFINITY)
+ (void) asprintf(&s, "infinity:" RLIM_FMT, rl->rlim_max);
+ else if (rl->rlim_max >= RLIM_INFINITY)
+ (void) asprintf(&s, RLIM_FMT ":infinity", rl->rlim_cur);
+ else if (rl->rlim_cur == rl->rlim_max)
+ (void) asprintf(&s, RLIM_FMT, rl->rlim_cur);
+ else
+ (void) asprintf(&s, RLIM_FMT ":" RLIM_FMT, rl->rlim_cur, rl->rlim_max);
+
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+static const char* const rlimit_table[_RLIMIT_MAX] = {
+ [RLIMIT_AS] = "AS",
+ [RLIMIT_CORE] = "CORE",
+ [RLIMIT_CPU] = "CPU",
+ [RLIMIT_DATA] = "DATA",
+ [RLIMIT_FSIZE] = "FSIZE",
+ [RLIMIT_LOCKS] = "LOCKS",
+ [RLIMIT_MEMLOCK] = "MEMLOCK",
+ [RLIMIT_MSGQUEUE] = "MSGQUEUE",
+ [RLIMIT_NICE] = "NICE",
+ [RLIMIT_NOFILE] = "NOFILE",
+ [RLIMIT_NPROC] = "NPROC",
+ [RLIMIT_RSS] = "RSS",
+ [RLIMIT_RTPRIO] = "RTPRIO",
+ [RLIMIT_RTTIME] = "RTTIME",
+ [RLIMIT_SIGPENDING] = "SIGPENDING",
+ [RLIMIT_STACK] = "STACK",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(rlimit, int);
+
+int rlimit_from_string_harder(const char *s) {
+ const char *suffix;
+
+ /* The official prefix */
+ suffix = startswith(s, "RLIMIT_");
+ if (suffix)
+ return rlimit_from_string(suffix);
+
+ /* Our own unit file setting prefix */
+ suffix = startswith(s, "Limit");
+ if (suffix)
+ return rlimit_from_string(suffix);
+
+ return rlimit_from_string(s);
+}
+
+void rlimit_free_all(struct rlimit **rl) {
+ int i;
+
+ if (!rl)
+ return;
+
+ for (i = 0; i < _RLIMIT_MAX; i++)
+ rl[i] = mfree(rl[i]);
+}
+
+int rlimit_nofile_bump(int limit) {
+ int r;
+
+ /* Bumps the (soft) RLIMIT_NOFILE resource limit as close as possible to the specified limit. If a negative
+ * limit is specified, bumps it to the maximum the kernel and the hard resource limit allows. This call should
+ * be used by all our programs that might need a lot of fds, and that know how to deal with high fd numbers
+ * (i.e. do not use select() — which chokes on fds >= 1024) */
+
+ if (limit < 0)
+ limit = read_nr_open();
+
+ if (limit < 3)
+ limit = 3;
+
+ r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(limit));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to set RLIMIT_NOFILE: %m");
+
+ return 0;
+}
+
+int rlimit_nofile_safe(void) {
+ struct rlimit rl;
+
+ /* Resets RLIMIT_NOFILE's soft limit FD_SETSIZE (i.e. 1024), for compatibility with software still using
+ * select() */
+
+ if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
+ return log_debug_errno(errno, "Failed to query RLIMIT_NOFILE: %m");
+
+ if (rl.rlim_cur <= FD_SETSIZE)
+ return 0;
+
+ rl.rlim_cur = FD_SETSIZE;
+ if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
+ return log_debug_errno(errno, "Failed to lower RLIMIT_NOFILE's soft limit to " RLIM_FMT ": %m", rl.rlim_cur);
+
+ return 1;
+}
diff --git a/src/basic/rlimit-util.h b/src/basic/rlimit-util.h
new file mode 100644
index 0000000..d4fca2b
--- /dev/null
+++ b/src/basic/rlimit-util.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/resource.h>
+
+#include "macro.h"
+
+const char *rlimit_to_string(int i) _const_;
+int rlimit_from_string(const char *s) _pure_;
+int rlimit_from_string_harder(const char *s) _pure_;
+
+int setrlimit_closest(int resource, const struct rlimit *rlim);
+int setrlimit_closest_all(const struct rlimit * const *rlim, int *which_failed);
+
+int rlimit_parse_one(int resource, const char *val, rlim_t *ret);
+int rlimit_parse(int resource, const char *val, struct rlimit *ret);
+
+int rlimit_format(const struct rlimit *rl, char **ret);
+
+void rlimit_free_all(struct rlimit **rl);
+
+#define RLIMIT_MAKE_CONST(lim) ((struct rlimit) { lim, lim })
+
+int rlimit_nofile_bump(int limit);
+int rlimit_nofile_safe(void);
diff --git a/src/basic/rm-rf.c b/src/basic/rm-rf.c
new file mode 100644
index 0000000..0c957c9
--- /dev/null
+++ b/src/basic/rm-rf.c
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "cgroup-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stat-util.h"
+#include "string-util.h"
+
+static bool is_physical_fs(const struct statfs *sfs) {
+ return !is_temporary_fs(sfs) && !is_cgroup_fs(sfs);
+}
+
+int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int ret = 0, r;
+ struct statfs sfs;
+
+ assert(fd >= 0);
+
+ /* This returns the first error we run into, but nevertheless
+ * tries to go on. This closes the passed fd. */
+
+ if (!(flags & REMOVE_PHYSICAL)) {
+
+ r = fstatfs(fd, &sfs);
+ if (r < 0) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ if (is_physical_fs(&sfs)) {
+ /* We refuse to clean physical file systems with this call,
+ * unless explicitly requested. This is extra paranoia just
+ * to be sure we never ever remove non-state data. */
+ _cleanup_free_ char *path = NULL;
+
+ (void) fd_get_path(fd, &path);
+ log_error("Attempted to remove disk file system under \"%s\", and we can't allow that.",
+ strna(path));
+
+ safe_close(fd);
+ return -EPERM;
+ }
+ }
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return errno == ENOENT ? 0 : -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ bool is_dir;
+ struct stat st;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (de->d_type == DT_UNKNOWN ||
+ (de->d_type == DT_DIR && (root_dev || (flags & REMOVE_SUBVOLUME)))) {
+ if (fstatat(fd, de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (ret == 0 && errno != ENOENT)
+ ret = -errno;
+ continue;
+ }
+
+ is_dir = S_ISDIR(st.st_mode);
+ } else
+ is_dir = de->d_type == DT_DIR;
+
+ if (is_dir) {
+ int subdir_fd;
+
+ /* if root_dev is set, remove subdirectories only if device is same */
+ if (root_dev && st.st_dev != root_dev->st_dev)
+ continue;
+
+ subdir_fd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (subdir_fd < 0) {
+ if (ret == 0 && errno != ENOENT)
+ ret = -errno;
+ continue;
+ }
+
+ /* Stop at mount points */
+ r = fd_is_mount_point(fd, de->d_name, 0);
+ if (r < 0) {
+ if (ret == 0 && r != -ENOENT)
+ ret = r;
+
+ safe_close(subdir_fd);
+ continue;
+ }
+ if (r) {
+ safe_close(subdir_fd);
+ continue;
+ }
+
+ if ((flags & REMOVE_SUBVOLUME) && st.st_ino == 256) {
+
+ /* This could be a subvolume, try to remove it */
+
+ r = btrfs_subvol_remove_fd(fd, de->d_name, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+ if (r < 0) {
+ if (!IN_SET(r, -ENOTTY, -EINVAL)) {
+ if (ret == 0)
+ ret = r;
+
+ safe_close(subdir_fd);
+ continue;
+ }
+
+ /* ENOTTY, then it wasn't a
+ * btrfs subvolume, continue
+ * below. */
+ } else {
+ /* It was a subvolume, continue. */
+ safe_close(subdir_fd);
+ continue;
+ }
+ }
+
+ /* We pass REMOVE_PHYSICAL here, to avoid
+ * doing the fstatfs() to check the file
+ * system type again for each directory */
+ r = rm_rf_children(subdir_fd, flags | REMOVE_PHYSICAL, root_dev);
+ if (r < 0 && ret == 0)
+ ret = r;
+
+ if (unlinkat(fd, de->d_name, AT_REMOVEDIR) < 0) {
+ if (ret == 0 && errno != ENOENT)
+ ret = -errno;
+ }
+
+ } else if (!(flags & REMOVE_ONLY_DIRECTORIES)) {
+
+ if (unlinkat(fd, de->d_name, 0) < 0) {
+ if (ret == 0 && errno != ENOENT)
+ ret = -errno;
+ }
+ }
+ }
+ return ret;
+}
+
+int rm_rf(const char *path, RemoveFlags flags) {
+ int fd, r;
+ struct statfs s;
+
+ assert(path);
+
+ /* We refuse to clean the root file system with this
+ * call. This is extra paranoia to never cause a really
+ * seriously broken system. */
+ if (path_equal_or_files_same(path, "/", AT_SYMLINK_NOFOLLOW))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Attempted to remove entire root file system (\"%s\"), and we can't allow that.",
+ path);
+
+ if (FLAGS_SET(flags, REMOVE_SUBVOLUME | REMOVE_ROOT | REMOVE_PHYSICAL)) {
+ /* Try to remove as subvolume first */
+ r = btrfs_subvol_remove(path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+ if (r >= 0)
+ return r;
+
+ if (!IN_SET(r, -ENOTTY, -EINVAL, -ENOTDIR))
+ return r;
+
+ /* Not btrfs or not a subvolume */
+ }
+
+ fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (fd < 0) {
+ if (!IN_SET(errno, ENOTDIR, ELOOP))
+ return -errno;
+
+ if (!(flags & REMOVE_PHYSICAL)) {
+ if (statfs(path, &s) < 0)
+ return -errno;
+
+ if (is_physical_fs(&s))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Attempted to remove files from a disk file system under \"%s\", refusing.",
+ path);
+ }
+
+ if ((flags & REMOVE_ROOT) && !(flags & REMOVE_ONLY_DIRECTORIES))
+ if (unlink(path) < 0 && errno != ENOENT)
+ return -errno;
+
+ return 0;
+ }
+
+ r = rm_rf_children(fd, flags, NULL);
+
+ if (flags & REMOVE_ROOT) {
+ if (rmdir(path) < 0) {
+ if (r == 0 && errno != ENOENT)
+ r = -errno;
+ }
+ }
+
+ return r;
+}
diff --git a/src/basic/rm-rf.h b/src/basic/rm-rf.h
new file mode 100644
index 0000000..3ee2b97
--- /dev/null
+++ b/src/basic/rm-rf.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/stat.h>
+
+#include "util.h"
+
+typedef enum RemoveFlags {
+ REMOVE_ONLY_DIRECTORIES = 1 << 0,
+ REMOVE_ROOT = 1 << 1,
+ REMOVE_PHYSICAL = 1 << 2, /* if not set, only removes files on tmpfs, never physical file systems */
+ REMOVE_SUBVOLUME = 1 << 3,
+} RemoveFlags;
+
+int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev);
+int rm_rf(const char *path, RemoveFlags flags);
+
+/* Useful for usage with _cleanup_(), destroys a directory and frees the pointer */
+static inline void rm_rf_physical_and_free(char *p) {
+ PROTECT_ERRNO;
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rm_rf_physical_and_free);
+
+/* Similar as above, but also has magic btrfs subvolume powers */
+static inline void rm_rf_subvolume_and_free(char *p) {
+ PROTECT_ERRNO;
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rm_rf_subvolume_and_free);
diff --git a/src/basic/selinux-util.c b/src/basic/selinux-util.c
new file mode 100644
index 0000000..dc06f3d
--- /dev/null
+++ b/src/basic/selinux-util.c
@@ -0,0 +1,518 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <malloc.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/un.h>
+#include <syslog.h>
+
+#if HAVE_SELINUX
+#include <selinux/context.h>
+#include <selinux/label.h>
+#include <selinux/selinux.h>
+#endif
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "stdio-util.h"
+#include "time-util.h"
+#include "util.h"
+
+#if HAVE_SELINUX
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, freecon);
+DEFINE_TRIVIAL_CLEANUP_FUNC(context_t, context_free);
+
+#define _cleanup_freecon_ _cleanup_(freeconp)
+#define _cleanup_context_free_ _cleanup_(context_freep)
+
+static int cached_use = -1;
+static struct selabel_handle *label_hnd = NULL;
+
+#define log_enforcing(...) log_full(security_getenforce() == 1 ? LOG_ERR : LOG_DEBUG, __VA_ARGS__)
+#define log_enforcing_errno(r, ...) log_full_errno(security_getenforce() == 1 ? LOG_ERR : LOG_DEBUG, r, __VA_ARGS__)
+#endif
+
+bool mac_selinux_use(void) {
+#if HAVE_SELINUX
+ if (cached_use < 0)
+ cached_use = is_selinux_enabled() > 0;
+
+ return cached_use;
+#else
+ return false;
+#endif
+}
+
+void mac_selinux_retest(void) {
+#if HAVE_SELINUX
+ cached_use = -1;
+#endif
+}
+
+int mac_selinux_init(void) {
+ int r = 0;
+
+#if HAVE_SELINUX
+ usec_t before_timestamp, after_timestamp;
+ struct mallinfo before_mallinfo, after_mallinfo;
+
+ if (label_hnd)
+ return 0;
+
+ if (!mac_selinux_use())
+ return 0;
+
+ before_mallinfo = mallinfo();
+ before_timestamp = now(CLOCK_MONOTONIC);
+
+ label_hnd = selabel_open(SELABEL_CTX_FILE, NULL, 0);
+ if (!label_hnd) {
+ log_enforcing_errno(errno, "Failed to initialize SELinux context: %m");
+ r = security_getenforce() == 1 ? -errno : 0;
+ } else {
+ char timespan[FORMAT_TIMESPAN_MAX];
+ int l;
+
+ after_timestamp = now(CLOCK_MONOTONIC);
+ after_mallinfo = mallinfo();
+
+ l = after_mallinfo.uordblks > before_mallinfo.uordblks ? after_mallinfo.uordblks - before_mallinfo.uordblks : 0;
+
+ log_debug("Successfully loaded SELinux database in %s, size on heap is %iK.",
+ format_timespan(timespan, sizeof(timespan), after_timestamp - before_timestamp, 0),
+ (l+1023)/1024);
+ }
+#endif
+
+ return r;
+}
+
+void mac_selinux_finish(void) {
+
+#if HAVE_SELINUX
+ if (!label_hnd)
+ return;
+
+ selabel_close(label_hnd);
+ label_hnd = NULL;
+#endif
+}
+
+int mac_selinux_fix(const char *path, LabelFixFlags flags) {
+
+#if HAVE_SELINUX
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_freecon_ char* fcon = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int r;
+
+ assert(path);
+
+ /* if mac_selinux_init() wasn't called before we are a NOOP */
+ if (!label_hnd)
+ return 0;
+
+ /* Open the file as O_PATH, to pin it while we determine and adjust the label */
+ fd = open(path, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0) {
+ if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (selabel_lookup_raw(label_hnd, &fcon, path, st.st_mode) < 0) {
+ r = -errno;
+
+ /* If there's no label to set, then exit without warning */
+ if (r == -ENOENT)
+ return 0;
+
+ goto fail;
+ }
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ if (setfilecon_raw(procfs_path, fcon) < 0) {
+ _cleanup_freecon_ char *oldcon = NULL;
+
+ r = -errno;
+
+ /* If the FS doesn't support labels, then exit without warning */
+ if (r == -EOPNOTSUPP)
+ return 0;
+
+ /* It the FS is read-only and we were told to ignore failures caused by that, suppress error */
+ if (r == -EROFS && (flags & LABEL_IGNORE_EROFS))
+ return 0;
+
+ /* If the old label is identical to the new one, suppress any kind of error */
+ if (getfilecon_raw(procfs_path, &oldcon) >= 0 && streq(fcon, oldcon))
+ return 0;
+
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ log_enforcing_errno(r, "Unable to fix SELinux security context of %s: %m", path);
+ if (security_getenforce() == 1)
+ return r;
+#endif
+
+ return 0;
+}
+
+int mac_selinux_apply(const char *path, const char *label) {
+
+#if HAVE_SELINUX
+ if (!mac_selinux_use())
+ return 0;
+
+ assert(path);
+ assert(label);
+
+ if (setfilecon(path, label) < 0) {
+ log_enforcing_errno(errno, "Failed to set SELinux security context %s on path %s: %m", label, path);
+ if (security_getenforce() > 0)
+ return -errno;
+ }
+#endif
+ return 0;
+}
+
+int mac_selinux_get_create_label_from_exe(const char *exe, char **label) {
+ int r = -EOPNOTSUPP;
+
+#if HAVE_SELINUX
+ _cleanup_freecon_ char *mycon = NULL, *fcon = NULL;
+ security_class_t sclass;
+
+ assert(exe);
+ assert(label);
+
+ if (!mac_selinux_use())
+ return -EOPNOTSUPP;
+
+ r = getcon_raw(&mycon);
+ if (r < 0)
+ return -errno;
+
+ r = getfilecon_raw(exe, &fcon);
+ if (r < 0)
+ return -errno;
+
+ sclass = string_to_security_class("process");
+ r = security_compute_create_raw(mycon, fcon, sclass, label);
+ if (r < 0)
+ return -errno;
+#endif
+
+ return r;
+}
+
+int mac_selinux_get_our_label(char **label) {
+ int r = -EOPNOTSUPP;
+
+ assert(label);
+
+#if HAVE_SELINUX
+ if (!mac_selinux_use())
+ return -EOPNOTSUPP;
+
+ r = getcon_raw(label);
+ if (r < 0)
+ return -errno;
+#endif
+
+ return r;
+}
+
+int mac_selinux_get_child_mls_label(int socket_fd, const char *exe, const char *exec_label, char **label) {
+ int r = -EOPNOTSUPP;
+
+#if HAVE_SELINUX
+ _cleanup_freecon_ char *mycon = NULL, *peercon = NULL, *fcon = NULL;
+ _cleanup_context_free_ context_t pcon = NULL, bcon = NULL;
+ security_class_t sclass;
+ const char *range = NULL;
+
+ assert(socket_fd >= 0);
+ assert(exe);
+ assert(label);
+
+ if (!mac_selinux_use())
+ return -EOPNOTSUPP;
+
+ r = getcon_raw(&mycon);
+ if (r < 0)
+ return -errno;
+
+ r = getpeercon_raw(socket_fd, &peercon);
+ if (r < 0)
+ return -errno;
+
+ if (!exec_label) {
+ /* If there is no context set for next exec let's use context
+ of target executable */
+ r = getfilecon_raw(exe, &fcon);
+ if (r < 0)
+ return -errno;
+ }
+
+ bcon = context_new(mycon);
+ if (!bcon)
+ return -ENOMEM;
+
+ pcon = context_new(peercon);
+ if (!pcon)
+ return -ENOMEM;
+
+ range = context_range_get(pcon);
+ if (!range)
+ return -errno;
+
+ r = context_range_set(bcon, range);
+ if (r)
+ return -errno;
+
+ freecon(mycon);
+ mycon = strdup(context_str(bcon));
+ if (!mycon)
+ return -ENOMEM;
+
+ sclass = string_to_security_class("process");
+ r = security_compute_create_raw(mycon, fcon, sclass, label);
+ if (r < 0)
+ return -errno;
+#endif
+
+ return r;
+}
+
+char* mac_selinux_free(char *label) {
+
+#if HAVE_SELINUX
+ if (!label)
+ return NULL;
+
+ if (!mac_selinux_use())
+ return NULL;
+
+ freecon(label);
+#endif
+
+ return NULL;
+}
+
+#if HAVE_SELINUX
+static int selinux_create_file_prepare_abspath(const char *abspath, mode_t mode) {
+ _cleanup_freecon_ char *filecon = NULL;
+ int r;
+
+ assert(abspath);
+ assert(path_is_absolute(abspath));
+
+ r = selabel_lookup_raw(label_hnd, &filecon, abspath, mode);
+ if (r < 0) {
+ /* No context specified by the policy? Proceed without setting it. */
+ if (errno == ENOENT)
+ return 0;
+
+ log_enforcing_errno(errno, "Failed to determine SELinux security context for %s: %m", abspath);
+ } else {
+ if (setfscreatecon_raw(filecon) >= 0)
+ return 0; /* Success! */
+
+ log_enforcing_errno(errno, "Failed to set SELinux security context %s for %s: %m", filecon, abspath);
+ }
+
+ if (security_getenforce() > 0)
+ return -errno;
+
+ return 0;
+}
+#endif
+
+int mac_selinux_create_file_prepare_at(int dirfd, const char *path, mode_t mode) {
+ int r = 0;
+
+#if HAVE_SELINUX
+ _cleanup_free_ char *abspath = NULL;
+
+ assert(path);
+
+ if (!label_hnd)
+ return 0;
+
+ if (!path_is_absolute(path)) {
+ _cleanup_free_ char *p = NULL;
+
+ if (dirfd == AT_FDCWD)
+ r = safe_getcwd(&p);
+ else
+ r = fd_get_path(dirfd, &p);
+ if (r < 0)
+ return r;
+
+ path = abspath = path_join(p, path);
+ if (!path)
+ return -ENOMEM;
+ }
+
+ r = selinux_create_file_prepare_abspath(path, mode);
+#endif
+ return r;
+}
+
+int mac_selinux_create_file_prepare(const char *path, mode_t mode) {
+ int r = 0;
+
+#if HAVE_SELINUX
+ _cleanup_free_ char *abspath = NULL;
+
+ assert(path);
+
+ if (!label_hnd)
+ return 0;
+
+ r = path_make_absolute_cwd(path, &abspath);
+ if (r < 0)
+ return r;
+
+ r = selinux_create_file_prepare_abspath(abspath, mode);
+#endif
+ return r;
+}
+
+void mac_selinux_create_file_clear(void) {
+
+#if HAVE_SELINUX
+ PROTECT_ERRNO;
+
+ if (!mac_selinux_use())
+ return;
+
+ setfscreatecon_raw(NULL);
+#endif
+}
+
+int mac_selinux_create_socket_prepare(const char *label) {
+
+#if HAVE_SELINUX
+ if (!mac_selinux_use())
+ return 0;
+
+ assert(label);
+
+ if (setsockcreatecon(label) < 0) {
+ log_enforcing_errno(errno, "Failed to set SELinux security context %s for sockets: %m", label);
+
+ if (security_getenforce() == 1)
+ return -errno;
+ }
+#endif
+
+ return 0;
+}
+
+void mac_selinux_create_socket_clear(void) {
+
+#if HAVE_SELINUX
+ PROTECT_ERRNO;
+
+ if (!mac_selinux_use())
+ return;
+
+ setsockcreatecon_raw(NULL);
+#endif
+}
+
+int mac_selinux_bind(int fd, const struct sockaddr *addr, socklen_t addrlen) {
+
+ /* Binds a socket and label its file system object according to the SELinux policy */
+
+#if HAVE_SELINUX
+ _cleanup_freecon_ char *fcon = NULL;
+ const struct sockaddr_un *un;
+ bool context_changed = false;
+ char *path;
+ int r;
+
+ assert(fd >= 0);
+ assert(addr);
+ assert(addrlen >= sizeof(sa_family_t));
+
+ if (!label_hnd)
+ goto skipped;
+
+ /* Filter out non-local sockets */
+ if (addr->sa_family != AF_UNIX)
+ goto skipped;
+
+ /* Filter out anonymous sockets */
+ if (addrlen < offsetof(struct sockaddr_un, sun_path) + 1)
+ goto skipped;
+
+ /* Filter out abstract namespace sockets */
+ un = (const struct sockaddr_un*) addr;
+ if (un->sun_path[0] == 0)
+ goto skipped;
+
+ path = strndupa(un->sun_path, addrlen - offsetof(struct sockaddr_un, sun_path));
+
+ if (path_is_absolute(path))
+ r = selabel_lookup_raw(label_hnd, &fcon, path, S_IFSOCK);
+ else {
+ _cleanup_free_ char *newpath = NULL;
+
+ r = path_make_absolute_cwd(path, &newpath);
+ if (r < 0)
+ return r;
+
+ r = selabel_lookup_raw(label_hnd, &fcon, newpath, S_IFSOCK);
+ }
+
+ if (r < 0) {
+ /* No context specified by the policy? Proceed without setting it */
+ if (errno == ENOENT)
+ goto skipped;
+
+ log_enforcing_errno(errno, "Failed to determine SELinux security context for %s: %m", path);
+ if (security_getenforce() > 0)
+ return -errno;
+
+ } else {
+ if (setfscreatecon_raw(fcon) < 0) {
+ log_enforcing_errno(errno, "Failed to set SELinux security context %s for %s: %m", fcon, path);
+ if (security_getenforce() > 0)
+ return -errno;
+ } else
+ context_changed = true;
+ }
+
+ r = bind(fd, addr, addrlen) < 0 ? -errno : 0;
+
+ if (context_changed)
+ setfscreatecon_raw(NULL);
+
+ return r;
+
+skipped:
+#endif
+ if (bind(fd, addr, addrlen) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/basic/selinux-util.h b/src/basic/selinux-util.h
new file mode 100644
index 0000000..bd5207c
--- /dev/null
+++ b/src/basic/selinux-util.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "label.h"
+
+bool mac_selinux_use(void);
+void mac_selinux_retest(void);
+
+int mac_selinux_init(void);
+void mac_selinux_finish(void);
+
+int mac_selinux_fix(const char *path, LabelFixFlags flags);
+int mac_selinux_apply(const char *path, const char *label);
+
+int mac_selinux_get_create_label_from_exe(const char *exe, char **label);
+int mac_selinux_get_our_label(char **label);
+int mac_selinux_get_child_mls_label(int socket_fd, const char *exe, const char *exec_label, char **label);
+char* mac_selinux_free(char *label);
+
+int mac_selinux_create_file_prepare(const char *path, mode_t mode);
+int mac_selinux_create_file_prepare_at(int dirfd, const char *path, mode_t mode);
+void mac_selinux_create_file_clear(void);
+
+int mac_selinux_create_socket_prepare(const char *label);
+void mac_selinux_create_socket_clear(void);
+
+int mac_selinux_bind(int fd, const struct sockaddr *addr, socklen_t addrlen);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, mac_selinux_free);
diff --git a/src/basic/set.h b/src/basic/set.h
new file mode 100644
index 0000000..2a80632
--- /dev/null
+++ b/src/basic/set.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "extract-word.h"
+#include "hashmap.h"
+#include "macro.h"
+
+Set *internal_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define set_new(ops) internal_set_new(ops HASHMAP_DEBUG_SRC_ARGS)
+
+static inline Set *set_free(Set *s) {
+ return (Set*) internal_hashmap_free(HASHMAP_BASE(s), NULL, NULL);
+}
+
+static inline Set *set_free_free(Set *s) {
+ return (Set*) internal_hashmap_free(HASHMAP_BASE(s), free, NULL);
+}
+
+/* no set_free_free_free */
+
+static inline Set *set_copy(Set *s) {
+ return (Set*) internal_hashmap_copy(HASHMAP_BASE(s));
+}
+
+int internal_set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define set_ensure_allocated(h, ops) internal_set_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS)
+
+int set_put(Set *s, const void *key);
+/* no set_update */
+/* no set_replace */
+static inline void *set_get(Set *s, void *key) {
+ return internal_hashmap_get(HASHMAP_BASE(s), key);
+}
+/* no set_get2 */
+
+static inline bool set_contains(Set *s, const void *key) {
+ return internal_hashmap_contains(HASHMAP_BASE(s), key);
+}
+
+static inline void *set_remove(Set *s, const void *key) {
+ return internal_hashmap_remove(HASHMAP_BASE(s), key);
+}
+
+/* no set_remove2 */
+/* no set_remove_value */
+int set_remove_and_put(Set *s, const void *old_key, const void *new_key);
+/* no set_remove_and_replace */
+int set_merge(Set *s, Set *other);
+
+static inline int set_reserve(Set *h, unsigned entries_add) {
+ return internal_hashmap_reserve(HASHMAP_BASE(h), entries_add);
+}
+
+static inline int set_move(Set *s, Set *other) {
+ return internal_hashmap_move(HASHMAP_BASE(s), HASHMAP_BASE(other));
+}
+
+static inline int set_move_one(Set *s, Set *other, const void *key) {
+ return internal_hashmap_move_one(HASHMAP_BASE(s), HASHMAP_BASE(other), key);
+}
+
+static inline unsigned set_size(Set *s) {
+ return internal_hashmap_size(HASHMAP_BASE(s));
+}
+
+static inline bool set_isempty(Set *s) {
+ return set_size(s) == 0;
+}
+
+static inline unsigned set_buckets(Set *s) {
+ return internal_hashmap_buckets(HASHMAP_BASE(s));
+}
+
+bool set_iterate(Set *s, Iterator *i, void **value);
+
+static inline void set_clear(Set *s) {
+ internal_hashmap_clear(HASHMAP_BASE(s), NULL, NULL);
+}
+
+static inline void set_clear_free(Set *s) {
+ internal_hashmap_clear(HASHMAP_BASE(s), free, NULL);
+}
+
+/* no set_clear_free_free */
+
+static inline void *set_steal_first(Set *s) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(s), true, NULL);
+}
+
+#define set_clear_with_destructor(_s, _f) \
+ ({ \
+ void *_item; \
+ while ((_item = set_steal_first(_s))) \
+ _f(_item); \
+ })
+#define set_free_with_destructor(_s, _f) \
+ ({ \
+ set_clear_with_destructor(_s, _f); \
+ set_free(_s); \
+ })
+
+/* no set_steal_first_key */
+/* no set_first_key */
+
+static inline void *set_first(Set *s) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(s), false, NULL);
+}
+
+/* no set_next */
+
+static inline char **set_get_strv(Set *s) {
+ return internal_hashmap_get_strv(HASHMAP_BASE(s));
+}
+
+int set_consume(Set *s, void *value);
+int set_put_strdup(Set *s, const char *p);
+int set_put_strdupv(Set *s, char **l);
+int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags);
+
+#define SET_FOREACH(e, s, i) \
+ for ((i) = ITERATOR_FIRST; set_iterate((s), &(i), (void**)&(e)); )
+
+#define SET_FOREACH_MOVE(e, d, s) \
+ for (; ({ e = set_first(s); assert_se(!e || set_move_one(d, s, e) >= 0); e; }); )
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, set_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, set_free_free);
+
+#define _cleanup_set_free_ _cleanup_(set_freep)
+#define _cleanup_set_free_free_ _cleanup_(set_free_freep)
diff --git a/src/basic/sigbus.c b/src/basic/sigbus.c
new file mode 100644
index 0000000..d5254ea
--- /dev/null
+++ b/src/basic/sigbus.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stddef.h>
+#include <sys/mman.h>
+
+#include "macro.h"
+#include "sigbus.h"
+#include "util.h"
+
+#define SIGBUS_QUEUE_MAX 64
+
+static struct sigaction old_sigaction;
+static unsigned n_installed = 0;
+
+/* We maintain a fixed size list of page addresses that triggered a
+ SIGBUS. We access with list with atomic operations, so that we
+ don't have to deal with locks between signal handler and main
+ programs in possibly multiple threads. */
+
+static void* volatile sigbus_queue[SIGBUS_QUEUE_MAX];
+static volatile sig_atomic_t n_sigbus_queue = 0;
+
+static void sigbus_push(void *addr) {
+ unsigned u;
+
+ assert(addr);
+
+ /* Find a free place, increase the number of entries and leave, if we can */
+ for (u = 0; u < SIGBUS_QUEUE_MAX; u++)
+ if (__sync_bool_compare_and_swap(&sigbus_queue[u], NULL, addr)) {
+ __sync_fetch_and_add(&n_sigbus_queue, 1);
+ return;
+ }
+
+ /* If we can't, make sure the queue size is out of bounds, to
+ * mark it as overflow */
+ for (;;) {
+ unsigned c;
+
+ __sync_synchronize();
+ c = n_sigbus_queue;
+
+ if (c > SIGBUS_QUEUE_MAX) /* already overflow */
+ return;
+
+ if (__sync_bool_compare_and_swap(&n_sigbus_queue, c, c + SIGBUS_QUEUE_MAX))
+ return;
+ }
+}
+
+int sigbus_pop(void **ret) {
+ assert(ret);
+
+ for (;;) {
+ unsigned u, c;
+
+ __sync_synchronize();
+ c = n_sigbus_queue;
+
+ if (_likely_(c == 0))
+ return 0;
+
+ if (_unlikely_(c >= SIGBUS_QUEUE_MAX))
+ return -EOVERFLOW;
+
+ for (u = 0; u < SIGBUS_QUEUE_MAX; u++) {
+ void *addr;
+
+ addr = sigbus_queue[u];
+ if (!addr)
+ continue;
+
+ if (__sync_bool_compare_and_swap(&sigbus_queue[u], addr, NULL)) {
+ __sync_fetch_and_sub(&n_sigbus_queue, 1);
+ *ret = addr;
+ return 1;
+ }
+ }
+ }
+}
+
+static void sigbus_handler(int sn, siginfo_t *si, void *data) {
+ unsigned long ul;
+ void *aligned;
+
+ assert(sn == SIGBUS);
+ assert(si);
+
+ if (si->si_code != BUS_ADRERR || !si->si_addr) {
+ assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0);
+ raise(SIGBUS);
+ return;
+ }
+
+ ul = (unsigned long) si->si_addr;
+ ul = ul / page_size();
+ ul = ul * page_size();
+ aligned = (void*) ul;
+
+ /* Let's remember which address failed */
+ sigbus_push(aligned);
+
+ /* Replace mapping with an anonymous page, so that the
+ * execution can continue, however with a zeroed out page */
+ assert_se(mmap(aligned, page_size(), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == aligned);
+}
+
+void sigbus_install(void) {
+ struct sigaction sa = {
+ .sa_sigaction = sigbus_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+
+ /* make sure that sysconf() is not called from a signal handler because
+ * it is not guaranteed to be async-signal-safe since POSIX.1-2008 */
+ (void) page_size();
+
+ n_installed++;
+
+ if (n_installed == 1)
+ assert_se(sigaction(SIGBUS, &sa, &old_sigaction) == 0);
+
+ return;
+}
+
+void sigbus_reset(void) {
+
+ if (n_installed <= 0)
+ return;
+
+ n_installed--;
+
+ if (n_installed == 0)
+ assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0);
+
+ return;
+}
diff --git a/src/basic/sigbus.h b/src/basic/sigbus.h
new file mode 100644
index 0000000..459e19f
--- /dev/null
+++ b/src/basic/sigbus.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+void sigbus_install(void);
+void sigbus_reset(void);
+
+int sigbus_pop(void **ret);
diff --git a/src/basic/signal-util.c b/src/basic/signal-util.c
new file mode 100644
index 0000000..fb8a63f
--- /dev/null
+++ b/src/basic/signal-util.c
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "macro.h"
+#include "parse-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+
+int reset_all_signal_handlers(void) {
+ static const struct sigaction sa = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_RESTART,
+ };
+ int sig, r = 0;
+
+ for (sig = 1; sig < _NSIG; sig++) {
+
+ /* These two cannot be caught... */
+ if (IN_SET(sig, SIGKILL, SIGSTOP))
+ continue;
+
+ /* On Linux the first two RT signals are reserved by
+ * glibc, and sigaction() will return EINVAL for them. */
+ if ((sigaction(sig, &sa, NULL) < 0))
+ if (errno != EINVAL && r >= 0)
+ r = -errno;
+ }
+
+ return r;
+}
+
+int reset_signal_mask(void) {
+ sigset_t ss;
+
+ if (sigemptyset(&ss) < 0)
+ return -errno;
+
+ if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int sigaction_many_ap(const struct sigaction *sa, int sig, va_list ap) {
+ int r = 0;
+
+ /* negative signal ends the list. 0 signal is skipped. */
+
+ if (sig < 0)
+ return 0;
+
+ if (sig > 0) {
+ if (sigaction(sig, sa, NULL) < 0)
+ r = -errno;
+ }
+
+ while ((sig = va_arg(ap, int)) >= 0) {
+
+ if (sig == 0)
+ continue;
+
+ if (sigaction(sig, sa, NULL) < 0) {
+ if (r >= 0)
+ r = -errno;
+ }
+ }
+
+ return r;
+}
+
+int sigaction_many(const struct sigaction *sa, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, sa);
+ r = sigaction_many_ap(sa, 0, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int ignore_signals(int sig, ...) {
+
+ static const struct sigaction sa = {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_RESTART,
+ };
+
+ va_list ap;
+ int r;
+
+ va_start(ap, sig);
+ r = sigaction_many_ap(&sa, sig, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int default_signals(int sig, ...) {
+
+ static const struct sigaction sa = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_RESTART,
+ };
+
+ va_list ap;
+ int r;
+
+ va_start(ap, sig);
+ r = sigaction_many_ap(&sa, sig, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static int sigset_add_many_ap(sigset_t *ss, va_list ap) {
+ int sig, r = 0;
+
+ assert(ss);
+
+ while ((sig = va_arg(ap, int)) >= 0) {
+
+ if (sig == 0)
+ continue;
+
+ if (sigaddset(ss, sig) < 0) {
+ if (r >= 0)
+ r = -errno;
+ }
+ }
+
+ return r;
+}
+
+int sigset_add_many(sigset_t *ss, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, ss);
+ r = sigset_add_many_ap(ss, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int sigprocmask_many(int how, sigset_t *old, ...) {
+ va_list ap;
+ sigset_t ss;
+ int r;
+
+ if (sigemptyset(&ss) < 0)
+ return -errno;
+
+ va_start(ap, old);
+ r = sigset_add_many_ap(&ss, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ if (sigprocmask(how, &ss, old) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static const char *const __signal_table[] = {
+ [SIGHUP] = "HUP",
+ [SIGINT] = "INT",
+ [SIGQUIT] = "QUIT",
+ [SIGILL] = "ILL",
+ [SIGTRAP] = "TRAP",
+ [SIGABRT] = "ABRT",
+ [SIGBUS] = "BUS",
+ [SIGFPE] = "FPE",
+ [SIGKILL] = "KILL",
+ [SIGUSR1] = "USR1",
+ [SIGSEGV] = "SEGV",
+ [SIGUSR2] = "USR2",
+ [SIGPIPE] = "PIPE",
+ [SIGALRM] = "ALRM",
+ [SIGTERM] = "TERM",
+#ifdef SIGSTKFLT
+ [SIGSTKFLT] = "STKFLT", /* Linux on SPARC doesn't know SIGSTKFLT */
+#endif
+ [SIGCHLD] = "CHLD",
+ [SIGCONT] = "CONT",
+ [SIGSTOP] = "STOP",
+ [SIGTSTP] = "TSTP",
+ [SIGTTIN] = "TTIN",
+ [SIGTTOU] = "TTOU",
+ [SIGURG] = "URG",
+ [SIGXCPU] = "XCPU",
+ [SIGXFSZ] = "XFSZ",
+ [SIGVTALRM] = "VTALRM",
+ [SIGPROF] = "PROF",
+ [SIGWINCH] = "WINCH",
+ [SIGIO] = "IO",
+ [SIGPWR] = "PWR",
+ [SIGSYS] = "SYS"
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(__signal, int);
+
+const char *signal_to_string(int signo) {
+ static thread_local char buf[STRLEN("RTMIN+") + DECIMAL_STR_MAX(int) + 1];
+ const char *name;
+
+ name = __signal_to_string(signo);
+ if (name)
+ return name;
+
+ if (signo >= SIGRTMIN && signo <= SIGRTMAX)
+ xsprintf(buf, "RTMIN+%d", signo - SIGRTMIN);
+ else
+ xsprintf(buf, "%d", signo);
+
+ return buf;
+}
+
+int signal_from_string(const char *s) {
+ const char *p;
+ int signo, r;
+
+ /* Check that the input is a signal number. */
+ if (safe_atoi(s, &signo) >= 0) {
+ if (SIGNAL_VALID(signo))
+ return signo;
+ else
+ return -ERANGE;
+ }
+
+ /* Drop "SIG" prefix. */
+ if (startswith(s, "SIG"))
+ s += 3;
+
+ /* Check that the input is a signal name. */
+ signo = __signal_from_string(s);
+ if (signo > 0)
+ return signo;
+
+ /* Check that the input is RTMIN or
+ * RTMIN+n (0 <= n <= SIGRTMAX-SIGRTMIN). */
+ p = startswith(s, "RTMIN");
+ if (p) {
+ if (*p == '\0')
+ return SIGRTMIN;
+ if (*p != '+')
+ return -EINVAL;
+
+ r = safe_atoi(p, &signo);
+ if (r < 0)
+ return r;
+
+ if (signo < 0 || signo > SIGRTMAX - SIGRTMIN)
+ return -ERANGE;
+
+ return signo + SIGRTMIN;
+ }
+
+ /* Check that the input is RTMAX or
+ * RTMAX-n (0 <= n <= SIGRTMAX-SIGRTMIN). */
+ p = startswith(s, "RTMAX");
+ if (p) {
+ if (*p == '\0')
+ return SIGRTMAX;
+ if (*p != '-')
+ return -EINVAL;
+
+ r = safe_atoi(p, &signo);
+ if (r < 0)
+ return r;
+
+ if (signo > 0 || signo < SIGRTMIN - SIGRTMAX)
+ return -ERANGE;
+
+ return signo + SIGRTMAX;
+ }
+
+ return -EINVAL;
+}
+
+void nop_signal_handler(int sig) {
+ /* nothing here */
+}
diff --git a/src/basic/signal-util.h b/src/basic/signal-util.h
new file mode 100644
index 0000000..92f2804
--- /dev/null
+++ b/src/basic/signal-util.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <signal.h>
+
+#include "macro.h"
+
+int reset_all_signal_handlers(void);
+int reset_signal_mask(void);
+
+int ignore_signals(int sig, ...);
+int default_signals(int sig, ...);
+int sigaction_many(const struct sigaction *sa, ...);
+
+int sigset_add_many(sigset_t *ss, ...);
+int sigprocmask_many(int how, sigset_t *old, ...);
+
+const char *signal_to_string(int i) _const_;
+int signal_from_string(const char *s) _pure_;
+
+void nop_signal_handler(int sig);
+
+static inline void block_signals_reset(sigset_t *ss) {
+ assert_se(sigprocmask(SIG_SETMASK, ss, NULL) >= 0);
+}
+
+#define BLOCK_SIGNALS(...) \
+ _cleanup_(block_signals_reset) _unused_ sigset_t _saved_sigset = ({ \
+ sigset_t _t; \
+ assert_se(sigprocmask_many(SIG_BLOCK, &_t, __VA_ARGS__, -1) >= 0); \
+ _t; \
+ })
+
+static inline bool SIGNAL_VALID(int signo) {
+ return signo > 0 && signo < _NSIG;
+}
+
+static inline const char* signal_to_string_with_check(int n) {
+ if (!SIGNAL_VALID(n))
+ return NULL;
+
+ return signal_to_string(n);
+}
diff --git a/src/basic/siphash24.c b/src/basic/siphash24.c
new file mode 100644
index 0000000..6118081
--- /dev/null
+++ b/src/basic/siphash24.c
@@ -0,0 +1,200 @@
+/*
+ SipHash reference C implementation
+
+ Written in 2012 by
+ Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
+ Daniel J. Bernstein <djb@cr.yp.to>
+
+ To the extent possible under law, the author(s) have dedicated all copyright
+ and related and neighboring rights to this software to the public domain
+ worldwide. This software is distributed without any warranty.
+
+ You should have received a copy of the CC0 Public Domain Dedication along with
+ this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+ (Minimal changes made by Lennart Poettering, to make clean for inclusion in systemd)
+ (Refactored by Tom Gundersen to split up in several functions and follow systemd
+ coding style)
+*/
+
+#include <stdio.h>
+
+#include "macro.h"
+#include "siphash24.h"
+#include "unaligned.h"
+
+static uint64_t rotate_left(uint64_t x, uint8_t b) {
+ assert(b < 64);
+
+ return (x << b) | (x >> (64 - b));
+}
+
+static void sipround(struct siphash *state) {
+ assert(state);
+
+ state->v0 += state->v1;
+ state->v1 = rotate_left(state->v1, 13);
+ state->v1 ^= state->v0;
+ state->v0 = rotate_left(state->v0, 32);
+ state->v2 += state->v3;
+ state->v3 = rotate_left(state->v3, 16);
+ state->v3 ^= state->v2;
+ state->v0 += state->v3;
+ state->v3 = rotate_left(state->v3, 21);
+ state->v3 ^= state->v0;
+ state->v2 += state->v1;
+ state->v1 = rotate_left(state->v1, 17);
+ state->v1 ^= state->v2;
+ state->v2 = rotate_left(state->v2, 32);
+}
+
+void siphash24_init(struct siphash *state, const uint8_t k[static 16]) {
+ uint64_t k0, k1;
+
+ assert(state);
+ assert(k);
+
+ k0 = unaligned_read_le64(k);
+ k1 = unaligned_read_le64(k + 8);
+
+ *state = (struct siphash) {
+ /* "somepseudorandomlygeneratedbytes" */
+ .v0 = 0x736f6d6570736575ULL ^ k0,
+ .v1 = 0x646f72616e646f6dULL ^ k1,
+ .v2 = 0x6c7967656e657261ULL ^ k0,
+ .v3 = 0x7465646279746573ULL ^ k1,
+ .padding = 0,
+ .inlen = 0,
+ };
+}
+
+void siphash24_compress(const void *_in, size_t inlen, struct siphash *state) {
+
+ const uint8_t *in = _in;
+ const uint8_t *end = in + inlen;
+ size_t left = state->inlen & 7;
+ uint64_t m;
+
+ assert(in);
+ assert(state);
+
+ /* Update total length */
+ state->inlen += inlen;
+
+ /* If padding exists, fill it out */
+ if (left > 0) {
+ for ( ; in < end && left < 8; in ++, left ++)
+ state->padding |= ((uint64_t) *in) << (left * 8);
+
+ if (in == end && left < 8)
+ /* We did not have enough input to fill out the padding completely */
+ return;
+
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+ printf("(%3zu) compress padding %08x %08x\n", state->inlen, (uint32_t) (state->padding >> 32), (uint32_t)state->padding);
+#endif
+
+ state->v3 ^= state->padding;
+ sipround(state);
+ sipround(state);
+ state->v0 ^= state->padding;
+
+ state->padding = 0;
+ }
+
+ end -= (state->inlen % sizeof(uint64_t));
+
+ for ( ; in < end; in += 8) {
+ m = unaligned_read_le64(in);
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+ printf("(%3zu) compress %08x %08x\n", state->inlen, (uint32_t) (m >> 32), (uint32_t) m);
+#endif
+ state->v3 ^= m;
+ sipround(state);
+ sipround(state);
+ state->v0 ^= m;
+ }
+
+ left = state->inlen & 7;
+ switch (left) {
+ case 7:
+ state->padding |= ((uint64_t) in[6]) << 48;
+ _fallthrough_;
+ case 6:
+ state->padding |= ((uint64_t) in[5]) << 40;
+ _fallthrough_;
+ case 5:
+ state->padding |= ((uint64_t) in[4]) << 32;
+ _fallthrough_;
+ case 4:
+ state->padding |= ((uint64_t) in[3]) << 24;
+ _fallthrough_;
+ case 3:
+ state->padding |= ((uint64_t) in[2]) << 16;
+ _fallthrough_;
+ case 2:
+ state->padding |= ((uint64_t) in[1]) << 8;
+ _fallthrough_;
+ case 1:
+ state->padding |= ((uint64_t) in[0]);
+ _fallthrough_;
+ case 0:
+ break;
+ }
+}
+
+uint64_t siphash24_finalize(struct siphash *state) {
+ uint64_t b;
+
+ assert(state);
+
+ b = state->padding | (((uint64_t) state->inlen) << 56);
+
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+ printf("(%3zu) padding %08x %08x\n", state->inlen, (uint32_t) (state->padding >> 32), (uint32_t) state->padding);
+#endif
+
+ state->v3 ^= b;
+ sipround(state);
+ sipround(state);
+ state->v0 ^= b;
+
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+#endif
+ state->v2 ^= 0xff;
+
+ sipround(state);
+ sipround(state);
+ sipround(state);
+ sipround(state);
+
+ return state->v0 ^ state->v1 ^ state->v2 ^ state->v3;
+}
+
+uint64_t siphash24(const void *in, size_t inlen, const uint8_t k[static 16]) {
+ struct siphash state;
+
+ assert(in);
+ assert(k);
+
+ siphash24_init(&state, k);
+ siphash24_compress(in, inlen, &state);
+
+ return siphash24_finalize(&state);
+}
diff --git a/src/basic/siphash24.h b/src/basic/siphash24.h
new file mode 100644
index 0000000..67c4f75
--- /dev/null
+++ b/src/basic/siphash24.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/types.h>
+
+struct siphash {
+ uint64_t v0;
+ uint64_t v1;
+ uint64_t v2;
+ uint64_t v3;
+ uint64_t padding;
+ size_t inlen;
+};
+
+void siphash24_init(struct siphash *state, const uint8_t k[static 16]);
+void siphash24_compress(const void *in, size_t inlen, struct siphash *state);
+#define siphash24_compress_byte(byte, state) siphash24_compress((const uint8_t[]) { (byte) }, 1, (state))
+
+uint64_t siphash24_finalize(struct siphash *state);
+
+uint64_t siphash24(const void *in, size_t inlen, const uint8_t k[static 16]);
+
+static inline uint64_t siphash24_string(const char *s, const uint8_t k[static 16]) {
+ return siphash24(s, strlen(s) + 1, k);
+}
diff --git a/src/basic/smack-util.c b/src/basic/smack-util.c
new file mode 100644
index 0000000..123d00e
--- /dev/null
+++ b/src/basic/smack-util.c
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Intel Corporation
+
+ Author: Auke Kok <auke-jan.h.kok@intel.com>
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "smack-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "xattr-util.h"
+
+#if ENABLE_SMACK
+bool mac_smack_use(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0)
+ cached_use = access("/sys/fs/smackfs/", F_OK) >= 0;
+
+ return cached_use;
+}
+
+static const char* const smack_attr_table[_SMACK_ATTR_MAX] = {
+ [SMACK_ATTR_ACCESS] = "security.SMACK64",
+ [SMACK_ATTR_EXEC] = "security.SMACK64EXEC",
+ [SMACK_ATTR_MMAP] = "security.SMACK64MMAP",
+ [SMACK_ATTR_TRANSMUTE] = "security.SMACK64TRANSMUTE",
+ [SMACK_ATTR_IPIN] = "security.SMACK64IPIN",
+ [SMACK_ATTR_IPOUT] = "security.SMACK64IPOUT",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(smack_attr, SmackAttr);
+
+int mac_smack_read(const char *path, SmackAttr attr, char **label) {
+ assert(path);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+ assert(label);
+
+ if (!mac_smack_use())
+ return 0;
+
+ return getxattr_malloc(path, smack_attr_to_string(attr), label, true);
+}
+
+int mac_smack_read_fd(int fd, SmackAttr attr, char **label) {
+ assert(fd >= 0);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+ assert(label);
+
+ if (!mac_smack_use())
+ return 0;
+
+ return fgetxattr_malloc(fd, smack_attr_to_string(attr), label);
+}
+
+int mac_smack_apply(const char *path, SmackAttr attr, const char *label) {
+ int r;
+
+ assert(path);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+
+ if (!mac_smack_use())
+ return 0;
+
+ if (label)
+ r = lsetxattr(path, smack_attr_to_string(attr), label, strlen(label), 0);
+ else
+ r = lremovexattr(path, smack_attr_to_string(attr));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label) {
+ int r;
+
+ assert(fd >= 0);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+
+ if (!mac_smack_use())
+ return 0;
+
+ if (label)
+ r = fsetxattr(fd, smack_attr_to_string(attr), label, strlen(label), 0);
+ else
+ r = fremovexattr(fd, smack_attr_to_string(attr));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int mac_smack_apply_pid(pid_t pid, const char *label) {
+ const char *p;
+ int r = 0;
+
+ assert(label);
+
+ if (!mac_smack_use())
+ return 0;
+
+ p = procfs_file_alloca(pid, "attr/current");
+ r = write_string_file(p, label, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+static int smack_fix_fd(int fd , const char *abspath, LabelFixFlags flags) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ const char *label;
+ struct stat st;
+ int r;
+
+ /* The caller should have done the sanity checks. */
+ assert(abspath);
+ assert(path_is_absolute(abspath));
+
+ /* Path must be in /dev. */
+ if (!path_startswith(abspath, "/dev"))
+ return 0;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /*
+ * Label directories and character devices "*".
+ * Label symlinks "_".
+ * Don't change anything else.
+ */
+
+ if (S_ISDIR(st.st_mode))
+ label = SMACK_STAR_LABEL;
+ else if (S_ISLNK(st.st_mode))
+ label = SMACK_FLOOR_LABEL;
+ else if (S_ISCHR(st.st_mode))
+ label = SMACK_STAR_LABEL;
+ else
+ return 0;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ if (setxattr(procfs_path, "security.SMACK64", label, strlen(label), 0) < 0) {
+ _cleanup_free_ char *old_label = NULL;
+
+ r = -errno;
+
+ /* If the FS doesn't support labels, then exit without warning */
+ if (r == -EOPNOTSUPP)
+ return 0;
+
+ /* It the FS is read-only and we were told to ignore failures caused by that, suppress error */
+ if (r == -EROFS && (flags & LABEL_IGNORE_EROFS))
+ return 0;
+
+ /* If the old label is identical to the new one, suppress any kind of error */
+ if (getxattr_malloc(procfs_path, "security.SMACK64", &old_label, false) >= 0 &&
+ streq(old_label, label))
+ return 0;
+
+ return log_debug_errno(r, "Unable to fix SMACK label of %s: %m", abspath);
+ }
+
+ return 0;
+}
+
+int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(path);
+
+ if (!mac_smack_use())
+ return 0;
+
+ fd = openat(dirfd, path, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0) {
+ if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (!path_is_absolute(path)) {
+ r = fd_get_path(fd, &p);
+ if (r < 0)
+ return r;
+ path = p;
+ }
+
+ return smack_fix_fd(fd, path, flags);
+}
+
+int mac_smack_fix(const char *path, LabelFixFlags flags) {
+ _cleanup_free_ char *abspath = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(path);
+
+ if (!mac_smack_use())
+ return 0;
+
+ r = path_make_absolute_cwd(path, &abspath);
+ if (r < 0)
+ return r;
+
+ fd = open(abspath, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0) {
+ if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ return smack_fix_fd(fd, abspath, flags);
+}
+
+int mac_smack_copy(const char *dest, const char *src) {
+ int r = 0;
+ _cleanup_free_ char *label = NULL;
+
+ assert(dest);
+ assert(src);
+
+ r = mac_smack_read(src, SMACK_ATTR_ACCESS, &label);
+ if (r < 0)
+ return r;
+
+ r = mac_smack_apply(dest, SMACK_ATTR_ACCESS, label);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+#else
+bool mac_smack_use(void) {
+ return false;
+}
+
+int mac_smack_read(const char *path, SmackAttr attr, char **label) {
+ return -EOPNOTSUPP;
+}
+
+int mac_smack_read_fd(int fd, SmackAttr attr, char **label) {
+ return -EOPNOTSUPP;
+}
+
+int mac_smack_apply(const char *path, SmackAttr attr, const char *label) {
+ return 0;
+}
+
+int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label) {
+ return 0;
+}
+
+int mac_smack_apply_pid(pid_t pid, const char *label) {
+ return 0;
+}
+
+int mac_smack_fix(const char *path, LabelFixFlags flags) {
+ return 0;
+}
+
+int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags) {
+ return 0;
+}
+
+int mac_smack_copy(const char *dest, const char *src) {
+ return 0;
+}
+#endif
diff --git a/src/basic/smack-util.h b/src/basic/smack-util.h
new file mode 100644
index 0000000..395ec07
--- /dev/null
+++ b/src/basic/smack-util.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation
+
+ Author: Auke Kok <auke-jan.h.kok@intel.com>
+***/
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "label.h"
+#include "macro.h"
+
+#define SMACK_FLOOR_LABEL "_"
+#define SMACK_STAR_LABEL "*"
+
+typedef enum SmackAttr {
+ SMACK_ATTR_ACCESS,
+ SMACK_ATTR_EXEC,
+ SMACK_ATTR_MMAP,
+ SMACK_ATTR_TRANSMUTE,
+ SMACK_ATTR_IPIN,
+ SMACK_ATTR_IPOUT,
+ _SMACK_ATTR_MAX,
+ _SMACK_ATTR_INVALID = -1,
+} SmackAttr;
+
+bool mac_smack_use(void);
+
+int mac_smack_fix(const char *path, LabelFixFlags flags);
+int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags);
+
+const char* smack_attr_to_string(SmackAttr i) _const_;
+SmackAttr smack_attr_from_string(const char *s) _pure_;
+int mac_smack_read(const char *path, SmackAttr attr, char **label);
+int mac_smack_read_fd(int fd, SmackAttr attr, char **label);
+int mac_smack_apply(const char *path, SmackAttr attr, const char *label);
+int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label);
+int mac_smack_apply_pid(pid_t pid, const char *label);
+int mac_smack_copy(const char *dest, const char *src);
diff --git a/src/basic/socket-label.c b/src/basic/socket-label.c
new file mode 100644
index 0000000..4ed19cd
--- /dev/null
+++ b/src/basic/socket-label.c
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "selinux-util.h"
+#include "socket-util.h"
+#include "umask-util.h"
+
+int socket_address_listen(
+ const SocketAddress *a,
+ int flags,
+ int backlog,
+ SocketAddressBindIPv6Only only,
+ const char *bind_to_device,
+ bool reuse_port,
+ bool free_bind,
+ bool transparent,
+ mode_t directory_mode,
+ mode_t socket_mode,
+ const char *label) {
+
+ _cleanup_close_ int fd = -1;
+ const char *p;
+ int r;
+
+ assert(a);
+
+ r = socket_address_verify(a, true);
+ if (r < 0)
+ return r;
+
+ if (socket_address_family(a) == AF_INET6 && !socket_ipv6_is_supported())
+ return -EAFNOSUPPORT;
+
+ if (label) {
+ r = mac_selinux_create_socket_prepare(label);
+ if (r < 0)
+ return r;
+ }
+
+ fd = socket(socket_address_family(a), a->type | flags, a->protocol);
+ r = fd < 0 ? -errno : 0;
+
+ if (label)
+ mac_selinux_create_socket_clear();
+
+ if (r < 0)
+ return r;
+
+ if (socket_address_family(a) == AF_INET6 && only != SOCKET_ADDRESS_DEFAULT) {
+ r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_V6ONLY, only == SOCKET_ADDRESS_IPV6_ONLY);
+ if (r < 0)
+ return r;
+ }
+
+ if (IN_SET(socket_address_family(a), AF_INET, AF_INET6)) {
+ if (bind_to_device)
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, bind_to_device, strlen(bind_to_device)+1) < 0)
+ return -errno;
+
+ if (reuse_port) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEPORT, true);
+ if (r < 0)
+ log_warning_errno(r, "SO_REUSEPORT failed: %m");
+ }
+
+ if (free_bind) {
+ r = setsockopt_int(fd, IPPROTO_IP, IP_FREEBIND, true);
+ if (r < 0)
+ log_warning_errno(r, "IP_FREEBIND failed: %m");
+ }
+
+ if (transparent) {
+ r = setsockopt_int(fd, IPPROTO_IP, IP_TRANSPARENT, true);
+ if (r < 0)
+ log_warning_errno(r, "IP_TRANSPARENT failed: %m");
+ }
+ }
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return r;
+
+ p = socket_address_get_path(a);
+ if (p) {
+ /* Create parents */
+ (void) mkdir_parents_label(p, directory_mode);
+
+ /* Enforce the right access mode for the socket */
+ RUN_WITH_UMASK(~socket_mode) {
+ r = mac_selinux_bind(fd, &a->sockaddr.sa, a->size);
+ if (r == -EADDRINUSE) {
+ /* Unlink and try again */
+
+ if (unlink(p) < 0)
+ return r; /* didn't work, return original error */
+
+ r = mac_selinux_bind(fd, &a->sockaddr.sa, a->size);
+ }
+ if (r < 0)
+ return r;
+ }
+ } else {
+ if (bind(fd, &a->sockaddr.sa, a->size) < 0)
+ return -errno;
+ }
+
+ if (socket_address_can_accept(a))
+ if (listen(fd, backlog) < 0)
+ return -errno;
+
+ /* Let's trigger an inotify event on the socket node, so that anyone waiting for this socket to be connectable
+ * gets notified */
+ if (p)
+ (void) touch(p);
+
+ r = fd;
+ fd = -1;
+
+ return r;
+}
+
+int make_socket_fd(int log_level, const char* address, int type, int flags) {
+ SocketAddress a;
+ int fd, r;
+
+ r = socket_address_parse(&a, address);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse socket address \"%s\": %m", address);
+
+ a.type = type;
+
+ fd = socket_address_listen(&a, type | flags, SOMAXCONN, SOCKET_ADDRESS_DEFAULT,
+ NULL, false, false, false, 0755, 0644, NULL);
+ if (fd < 0 || log_get_max_level() >= log_level) {
+ _cleanup_free_ char *p = NULL;
+
+ r = socket_address_print(&a, &p);
+ if (r < 0)
+ return log_error_errno(r, "socket_address_print(): %m");
+
+ if (fd < 0)
+ log_error_errno(fd, "Failed to listen on %s: %m", p);
+ else
+ log_full(log_level, "Listening on %s", p);
+ }
+
+ return fd;
+}
diff --git a/src/basic/socket-util.c b/src/basic/socket-util.c
new file mode 100644
index 0000000..91bf801
--- /dev/null
+++ b/src/basic/socket-util.c
@@ -0,0 +1,1347 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <limits.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "utf8.h"
+#include "util.h"
+
+#if ENABLE_IDN
+# define IDN_FLAGS NI_IDN
+#else
+# define IDN_FLAGS 0
+#endif
+
+static const char* const socket_address_type_table[] = {
+ [SOCK_STREAM] = "Stream",
+ [SOCK_DGRAM] = "Datagram",
+ [SOCK_RAW] = "Raw",
+ [SOCK_RDM] = "ReliableDatagram",
+ [SOCK_SEQPACKET] = "SequentialPacket",
+ [SOCK_DCCP] = "DatagramCongestionControl",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_address_type, int);
+
+int socket_address_parse(SocketAddress *a, const char *s) {
+ _cleanup_free_ char *n = NULL;
+ char *e;
+ int r;
+
+ assert(a);
+ assert(s);
+
+ *a = (SocketAddress) {
+ .type = SOCK_STREAM,
+ };
+
+ if (*s == '[') {
+ uint16_t port;
+
+ /* IPv6 in [x:.....:z]:p notation */
+
+ e = strchr(s+1, ']');
+ if (!e)
+ return -EINVAL;
+
+ n = strndup(s+1, e-s-1);
+ if (!n)
+ return -ENOMEM;
+
+ errno = 0;
+ if (inet_pton(AF_INET6, n, &a->sockaddr.in6.sin6_addr) <= 0)
+ return errno > 0 ? -errno : -EINVAL;
+
+ e++;
+ if (*e != ':')
+ return -EINVAL;
+
+ e++;
+ r = parse_ip_port(e, &port);
+ if (r < 0)
+ return r;
+
+ a->sockaddr.in6.sin6_family = AF_INET6;
+ a->sockaddr.in6.sin6_port = htobe16(port);
+ a->size = sizeof(struct sockaddr_in6);
+
+ } else if (*s == '/') {
+ /* AF_UNIX socket */
+
+ size_t l;
+
+ l = strlen(s);
+ if (l >= sizeof(a->sockaddr.un.sun_path)) /* Note that we refuse non-NUL-terminated sockets when
+ * parsing (the kernel itself is less strict here in what it
+ * accepts) */
+ return -EINVAL;
+
+ a->sockaddr.un.sun_family = AF_UNIX;
+ memcpy(a->sockaddr.un.sun_path, s, l);
+ a->size = offsetof(struct sockaddr_un, sun_path) + l + 1;
+
+ } else if (*s == '@') {
+ /* Abstract AF_UNIX socket */
+ size_t l;
+
+ l = strlen(s+1);
+ if (l >= sizeof(a->sockaddr.un.sun_path) - 1) /* Note that we refuse non-NUL-terminated sockets here
+ * when parsing, even though abstract namespace sockets
+ * explicitly allow embedded NUL bytes and don't consider
+ * them special. But it's simply annoying to debug such
+ * sockets. */
+ return -EINVAL;
+
+ a->sockaddr.un.sun_family = AF_UNIX;
+ memcpy(a->sockaddr.un.sun_path+1, s+1, l);
+ a->size = offsetof(struct sockaddr_un, sun_path) + 1 + l;
+
+ } else if (startswith(s, "vsock:")) {
+ /* AF_VSOCK socket in vsock:cid:port notation */
+ const char *cid_start = s + STRLEN("vsock:");
+ unsigned port;
+
+ e = strchr(cid_start, ':');
+ if (!e)
+ return -EINVAL;
+
+ r = safe_atou(e+1, &port);
+ if (r < 0)
+ return r;
+
+ n = strndup(cid_start, e - cid_start);
+ if (!n)
+ return -ENOMEM;
+
+ if (!isempty(n)) {
+ r = safe_atou(n, &a->sockaddr.vm.svm_cid);
+ if (r < 0)
+ return r;
+ } else
+ a->sockaddr.vm.svm_cid = VMADDR_CID_ANY;
+
+ a->sockaddr.vm.svm_family = AF_VSOCK;
+ a->sockaddr.vm.svm_port = port;
+ a->size = sizeof(struct sockaddr_vm);
+
+ } else {
+ uint16_t port;
+
+ e = strchr(s, ':');
+ if (e) {
+ r = parse_ip_port(e + 1, &port);
+ if (r < 0)
+ return r;
+
+ n = strndup(s, e-s);
+ if (!n)
+ return -ENOMEM;
+
+ /* IPv4 in w.x.y.z:p notation? */
+ r = inet_pton(AF_INET, n, &a->sockaddr.in.sin_addr);
+ if (r < 0)
+ return -errno;
+
+ if (r > 0) {
+ /* Gotcha, it's a traditional IPv4 address */
+ a->sockaddr.in.sin_family = AF_INET;
+ a->sockaddr.in.sin_port = htobe16(port);
+ a->size = sizeof(struct sockaddr_in);
+ } else {
+ unsigned idx;
+
+ if (strlen(n) > IF_NAMESIZE-1)
+ return -EINVAL;
+
+ /* Uh, our last resort, an interface name */
+ idx = if_nametoindex(n);
+ if (idx == 0)
+ return -EINVAL;
+
+ a->sockaddr.in6.sin6_family = AF_INET6;
+ a->sockaddr.in6.sin6_port = htobe16(port);
+ a->sockaddr.in6.sin6_scope_id = idx;
+ a->sockaddr.in6.sin6_addr = in6addr_any;
+ a->size = sizeof(struct sockaddr_in6);
+ }
+ } else {
+
+ /* Just a port */
+ r = parse_ip_port(s, &port);
+ if (r < 0)
+ return r;
+
+ if (socket_ipv6_is_supported()) {
+ a->sockaddr.in6.sin6_family = AF_INET6;
+ a->sockaddr.in6.sin6_port = htobe16(port);
+ a->sockaddr.in6.sin6_addr = in6addr_any;
+ a->size = sizeof(struct sockaddr_in6);
+ } else {
+ a->sockaddr.in.sin_family = AF_INET;
+ a->sockaddr.in.sin_port = htobe16(port);
+ a->sockaddr.in.sin_addr.s_addr = INADDR_ANY;
+ a->size = sizeof(struct sockaddr_in);
+ }
+ }
+ }
+
+ return 0;
+}
+
+int socket_address_parse_and_warn(SocketAddress *a, const char *s) {
+ SocketAddress b;
+ int r;
+
+ /* Similar to socket_address_parse() but warns for IPv6 sockets when we don't support them. */
+
+ r = socket_address_parse(&b, s);
+ if (r < 0)
+ return r;
+
+ if (!socket_ipv6_is_supported() && b.sockaddr.sa.sa_family == AF_INET6) {
+ log_warning("Binding to IPv6 address not available since kernel does not support IPv6.");
+ return -EAFNOSUPPORT;
+ }
+
+ *a = b;
+ return 0;
+}
+
+int socket_address_parse_netlink(SocketAddress *a, const char *s) {
+ int family;
+ unsigned group = 0;
+ _cleanup_free_ char *sfamily = NULL;
+ assert(a);
+ assert(s);
+
+ zero(*a);
+ a->type = SOCK_RAW;
+
+ errno = 0;
+ if (sscanf(s, "%ms %u", &sfamily, &group) < 1)
+ return errno > 0 ? -errno : -EINVAL;
+
+ family = netlink_family_from_string(sfamily);
+ if (family < 0)
+ return -EINVAL;
+
+ a->sockaddr.nl.nl_family = AF_NETLINK;
+ a->sockaddr.nl.nl_groups = group;
+
+ a->type = SOCK_RAW;
+ a->size = sizeof(struct sockaddr_nl);
+ a->protocol = family;
+
+ return 0;
+}
+
+int socket_address_verify(const SocketAddress *a, bool strict) {
+ assert(a);
+
+ /* With 'strict' we enforce additional sanity constraints which are not set by the standard,
+ * but should only apply to sockets we create ourselves. */
+
+ switch (socket_address_family(a)) {
+
+ case AF_INET:
+ if (a->size != sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ if (a->sockaddr.in.sin_port == 0)
+ return -EINVAL;
+
+ if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_INET6:
+ if (a->size != sizeof(struct sockaddr_in6))
+ return -EINVAL;
+
+ if (a->sockaddr.in6.sin6_port == 0)
+ return -EINVAL;
+
+ if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_UNIX:
+ if (a->size < offsetof(struct sockaddr_un, sun_path))
+ return -EINVAL;
+ if (a->size > sizeof(struct sockaddr_un) + !strict)
+ /* If !strict, allow one extra byte, since getsockname() on Linux will append
+ * a NUL byte if we have path sockets that are above sun_path's full size. */
+ return -EINVAL;
+
+ if (a->size > offsetof(struct sockaddr_un, sun_path) &&
+ a->sockaddr.un.sun_path[0] != 0 &&
+ strict) {
+ /* Only validate file system sockets here, and only in strict mode */
+ const char *e;
+
+ e = memchr(a->sockaddr.un.sun_path, 0, sizeof(a->sockaddr.un.sun_path));
+ if (e) {
+ /* If there's an embedded NUL byte, make sure the size of the socket address matches it */
+ if (a->size != offsetof(struct sockaddr_un, sun_path) + (e - a->sockaddr.un.sun_path) + 1)
+ return -EINVAL;
+ } else {
+ /* If there's no embedded NUL byte, then then the size needs to match the whole
+ * structure or the structure with one extra NUL byte suffixed. (Yeah, Linux is awful,
+ * and considers both equivalent: getsockname() even extends sockaddr_un beyond its
+ * size if the path is non NUL terminated.)*/
+ if (!IN_SET(a->size, sizeof(a->sockaddr.un.sun_path), sizeof(a->sockaddr.un.sun_path)+1))
+ return -EINVAL;
+ }
+ }
+
+ if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_NETLINK:
+
+ if (a->size != sizeof(struct sockaddr_nl))
+ return -EINVAL;
+
+ if (!IN_SET(a->type, SOCK_RAW, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_VSOCK:
+ if (a->size != sizeof(struct sockaddr_vm))
+ return -EINVAL;
+
+ if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int socket_address_print(const SocketAddress *a, char **ret) {
+ int r;
+
+ assert(a);
+ assert(ret);
+
+ r = socket_address_verify(a, false); /* We do non-strict validation, because we want to be
+ * able to pretty-print any socket the kernel considers
+ * valid. We still need to do validation to know if we
+ * can meaningfully print the address. */
+ if (r < 0)
+ return r;
+
+ if (socket_address_family(a) == AF_NETLINK) {
+ _cleanup_free_ char *sfamily = NULL;
+
+ r = netlink_family_to_string_alloc(a->protocol, &sfamily);
+ if (r < 0)
+ return r;
+
+ r = asprintf(ret, "%s %u", sfamily, a->sockaddr.nl.nl_groups);
+ if (r < 0)
+ return -ENOMEM;
+
+ return 0;
+ }
+
+ return sockaddr_pretty(&a->sockaddr.sa, a->size, false, true, ret);
+}
+
+bool socket_address_can_accept(const SocketAddress *a) {
+ assert(a);
+
+ return
+ IN_SET(a->type, SOCK_STREAM, SOCK_SEQPACKET);
+}
+
+bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) {
+ assert(a);
+ assert(b);
+
+ /* Invalid addresses are unequal to all */
+ if (socket_address_verify(a, false) < 0 ||
+ socket_address_verify(b, false) < 0)
+ return false;
+
+ if (a->type != b->type)
+ return false;
+
+ if (socket_address_family(a) != socket_address_family(b))
+ return false;
+
+ switch (socket_address_family(a)) {
+
+ case AF_INET:
+ if (a->sockaddr.in.sin_addr.s_addr != b->sockaddr.in.sin_addr.s_addr)
+ return false;
+
+ if (a->sockaddr.in.sin_port != b->sockaddr.in.sin_port)
+ return false;
+
+ break;
+
+ case AF_INET6:
+ if (memcmp(&a->sockaddr.in6.sin6_addr, &b->sockaddr.in6.sin6_addr, sizeof(a->sockaddr.in6.sin6_addr)) != 0)
+ return false;
+
+ if (a->sockaddr.in6.sin6_port != b->sockaddr.in6.sin6_port)
+ return false;
+
+ break;
+
+ case AF_UNIX:
+ if (a->size <= offsetof(struct sockaddr_un, sun_path) ||
+ b->size <= offsetof(struct sockaddr_un, sun_path))
+ return false;
+
+ if ((a->sockaddr.un.sun_path[0] == 0) != (b->sockaddr.un.sun_path[0] == 0))
+ return false;
+
+ if (a->sockaddr.un.sun_path[0]) {
+ if (!path_equal_or_files_same(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, 0))
+ return false;
+ } else {
+ if (a->size != b->size)
+ return false;
+
+ if (memcmp(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, a->size) != 0)
+ return false;
+ }
+
+ break;
+
+ case AF_NETLINK:
+ if (a->protocol != b->protocol)
+ return false;
+
+ if (a->sockaddr.nl.nl_groups != b->sockaddr.nl.nl_groups)
+ return false;
+
+ break;
+
+ case AF_VSOCK:
+ if (a->sockaddr.vm.svm_cid != b->sockaddr.vm.svm_cid)
+ return false;
+
+ if (a->sockaddr.vm.svm_port != b->sockaddr.vm.svm_port)
+ return false;
+
+ break;
+
+ default:
+ /* Cannot compare, so we assume the addresses are different */
+ return false;
+ }
+
+ return true;
+}
+
+bool socket_address_is(const SocketAddress *a, const char *s, int type) {
+ struct SocketAddress b;
+
+ assert(a);
+ assert(s);
+
+ if (socket_address_parse(&b, s) < 0)
+ return false;
+
+ b.type = type;
+
+ return socket_address_equal(a, &b);
+}
+
+bool socket_address_is_netlink(const SocketAddress *a, const char *s) {
+ struct SocketAddress b;
+
+ assert(a);
+ assert(s);
+
+ if (socket_address_parse_netlink(&b, s) < 0)
+ return false;
+
+ return socket_address_equal(a, &b);
+}
+
+const char* socket_address_get_path(const SocketAddress *a) {
+ assert(a);
+
+ if (socket_address_family(a) != AF_UNIX)
+ return NULL;
+
+ if (a->sockaddr.un.sun_path[0] == 0)
+ return NULL;
+
+ /* Note that this is only safe because we know that there's an extra NUL byte after the sockaddr_un
+ * structure. On Linux AF_UNIX file system socket addresses don't have to be NUL terminated if they take up the
+ * full sun_path space. */
+ assert_cc(sizeof(union sockaddr_union) >= sizeof(struct sockaddr_un)+1);
+ return a->sockaddr.un.sun_path;
+}
+
+bool socket_ipv6_is_supported(void) {
+ if (access("/proc/net/if_inet6", F_OK) != 0)
+ return false;
+
+ return true;
+}
+
+bool socket_address_matches_fd(const SocketAddress *a, int fd) {
+ SocketAddress b;
+ socklen_t solen;
+
+ assert(a);
+ assert(fd >= 0);
+
+ b.size = sizeof(b.sockaddr);
+ if (getsockname(fd, &b.sockaddr.sa, &b.size) < 0)
+ return false;
+
+ if (b.sockaddr.sa.sa_family != a->sockaddr.sa.sa_family)
+ return false;
+
+ solen = sizeof(b.type);
+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &b.type, &solen) < 0)
+ return false;
+
+ if (b.type != a->type)
+ return false;
+
+ if (a->protocol != 0) {
+ solen = sizeof(b.protocol);
+ if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &b.protocol, &solen) < 0)
+ return false;
+
+ if (b.protocol != a->protocol)
+ return false;
+ }
+
+ return socket_address_equal(a, &b);
+}
+
+int sockaddr_port(const struct sockaddr *_sa, unsigned *ret_port) {
+ union sockaddr_union *sa = (union sockaddr_union*) _sa;
+
+ /* Note, this returns the port as 'unsigned' rather than 'uint16_t', as AF_VSOCK knows larger ports */
+
+ assert(sa);
+
+ switch (sa->sa.sa_family) {
+
+ case AF_INET:
+ *ret_port = be16toh(sa->in.sin_port);
+ return 0;
+
+ case AF_INET6:
+ *ret_port = be16toh(sa->in6.sin6_port);
+ return 0;
+
+ case AF_VSOCK:
+ *ret_port = sa->vm.svm_port;
+ return 0;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int sockaddr_pretty(
+ const struct sockaddr *_sa,
+ socklen_t salen,
+ bool translate_ipv6,
+ bool include_port,
+ char **ret) {
+
+ union sockaddr_union *sa = (union sockaddr_union*) _sa;
+ char *p;
+ int r;
+
+ assert(sa);
+ assert(salen >= sizeof(sa->sa.sa_family));
+
+ switch (sa->sa.sa_family) {
+
+ case AF_INET: {
+ uint32_t a;
+
+ a = be32toh(sa->in.sin_addr.s_addr);
+
+ if (include_port)
+ r = asprintf(&p,
+ "%u.%u.%u.%u:%u",
+ a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF,
+ be16toh(sa->in.sin_port));
+ else
+ r = asprintf(&p,
+ "%u.%u.%u.%u",
+ a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF);
+ if (r < 0)
+ return -ENOMEM;
+ break;
+ }
+
+ case AF_INET6: {
+ static const unsigned char ipv4_prefix[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF
+ };
+
+ if (translate_ipv6 &&
+ memcmp(&sa->in6.sin6_addr, ipv4_prefix, sizeof(ipv4_prefix)) == 0) {
+ const uint8_t *a = sa->in6.sin6_addr.s6_addr+12;
+ if (include_port)
+ r = asprintf(&p,
+ "%u.%u.%u.%u:%u",
+ a[0], a[1], a[2], a[3],
+ be16toh(sa->in6.sin6_port));
+ else
+ r = asprintf(&p,
+ "%u.%u.%u.%u",
+ a[0], a[1], a[2], a[3]);
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ char a[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, &sa->in6.sin6_addr, a, sizeof(a));
+
+ if (include_port) {
+ r = asprintf(&p,
+ "[%s]:%u",
+ a,
+ be16toh(sa->in6.sin6_port));
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ p = strdup(a);
+ if (!p)
+ return -ENOMEM;
+ }
+ }
+
+ break;
+ }
+
+ case AF_UNIX:
+ if (salen <= offsetof(struct sockaddr_un, sun_path) ||
+ (sa->un.sun_path[0] == 0 && salen == offsetof(struct sockaddr_un, sun_path) + 1))
+ /* The name must have at least one character (and the leading NUL does not count) */
+ p = strdup("<unnamed>");
+ else {
+ /* Note that we calculate the path pointer here through the .un_buffer[] field, in order to
+ * outtrick bounds checking tools such as ubsan, which are too smart for their own good: on
+ * Linux the kernel may return sun_path[] data one byte longer than the declared size of the
+ * field. */
+ char *path = (char*) sa->un_buffer + offsetof(struct sockaddr_un, sun_path);
+ size_t path_len = salen - offsetof(struct sockaddr_un, sun_path);
+
+ if (path[0] == 0) {
+ /* Abstract socket. When parsing address information from, we
+ * explicitly reject overly long paths and paths with embedded NULs.
+ * But we might get such a socket from the outside. Let's return
+ * something meaningful and printable in this case. */
+
+ _cleanup_free_ char *e = NULL;
+
+ e = cescape_length(path + 1, path_len - 1);
+ if (!e)
+ return -ENOMEM;
+
+ p = strjoin("@", e);
+ } else {
+ if (path[path_len - 1] == '\0')
+ /* We expect a terminating NUL and don't print it */
+ path_len --;
+
+ p = cescape_length(path, path_len);
+ }
+ }
+ if (!p)
+ return -ENOMEM;
+
+ break;
+
+ case AF_VSOCK:
+ if (include_port) {
+ if (sa->vm.svm_cid == VMADDR_CID_ANY)
+ r = asprintf(&p, "vsock::%u", sa->vm.svm_port);
+ else
+ r = asprintf(&p, "vsock:%u:%u", sa->vm.svm_cid, sa->vm.svm_port);
+ } else
+ r = asprintf(&p, "vsock:%u", sa->vm.svm_cid);
+ if (r < 0)
+ return -ENOMEM;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ *ret = p;
+ return 0;
+}
+
+int getpeername_pretty(int fd, bool include_port, char **ret) {
+ union sockaddr_union sa;
+ socklen_t salen = sizeof(sa);
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (getpeername(fd, &sa.sa, &salen) < 0)
+ return -errno;
+
+ if (sa.sa.sa_family == AF_UNIX) {
+ struct ucred ucred = {};
+
+ /* UNIX connection sockets are anonymous, so let's use
+ * PID/UID as pretty credentials instead */
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ if (asprintf(ret, "PID "PID_FMT"/UID "UID_FMT, ucred.pid, ucred.uid) < 0)
+ return -ENOMEM;
+
+ return 0;
+ }
+
+ /* For remote sockets we translate IPv6 addresses back to IPv4
+ * if applicable, since that's nicer. */
+
+ return sockaddr_pretty(&sa.sa, salen, true, include_port, ret);
+}
+
+int getsockname_pretty(int fd, char **ret) {
+ union sockaddr_union sa;
+ socklen_t salen = sizeof(sa);
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (getsockname(fd, &sa.sa, &salen) < 0)
+ return -errno;
+
+ /* For local sockets we do not translate IPv6 addresses back
+ * to IPv6 if applicable, since this is usually used for
+ * listening sockets where the difference between IPv4 and
+ * IPv6 matters. */
+
+ return sockaddr_pretty(&sa.sa, salen, false, true, ret);
+}
+
+int socknameinfo_pretty(union sockaddr_union *sa, socklen_t salen, char **_ret) {
+ int r;
+ char host[NI_MAXHOST], *ret;
+
+ assert(_ret);
+
+ r = getnameinfo(&sa->sa, salen, host, sizeof(host), NULL, 0, IDN_FLAGS);
+ if (r != 0) {
+ int saved_errno = errno;
+
+ r = sockaddr_pretty(&sa->sa, salen, true, true, &ret);
+ if (r < 0)
+ return r;
+
+ log_debug_errno(saved_errno, "getnameinfo(%s) failed: %m", ret);
+ } else {
+ ret = strdup(host);
+ if (!ret)
+ return -ENOMEM;
+ }
+
+ *_ret = ret;
+ return 0;
+}
+
+static const char* const netlink_family_table[] = {
+ [NETLINK_ROUTE] = "route",
+ [NETLINK_FIREWALL] = "firewall",
+ [NETLINK_INET_DIAG] = "inet-diag",
+ [NETLINK_NFLOG] = "nflog",
+ [NETLINK_XFRM] = "xfrm",
+ [NETLINK_SELINUX] = "selinux",
+ [NETLINK_ISCSI] = "iscsi",
+ [NETLINK_AUDIT] = "audit",
+ [NETLINK_FIB_LOOKUP] = "fib-lookup",
+ [NETLINK_CONNECTOR] = "connector",
+ [NETLINK_NETFILTER] = "netfilter",
+ [NETLINK_IP6_FW] = "ip6-fw",
+ [NETLINK_DNRTMSG] = "dnrtmsg",
+ [NETLINK_KOBJECT_UEVENT] = "kobject-uevent",
+ [NETLINK_GENERIC] = "generic",
+ [NETLINK_SCSITRANSPORT] = "scsitransport",
+ [NETLINK_ECRYPTFS] = "ecryptfs",
+ [NETLINK_RDMA] = "rdma",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(netlink_family, int, INT_MAX);
+
+static const char* const socket_address_bind_ipv6_only_table[_SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX] = {
+ [SOCKET_ADDRESS_DEFAULT] = "default",
+ [SOCKET_ADDRESS_BOTH] = "both",
+ [SOCKET_ADDRESS_IPV6_ONLY] = "ipv6-only"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_address_bind_ipv6_only, SocketAddressBindIPv6Only);
+
+SocketAddressBindIPv6Only socket_address_bind_ipv6_only_or_bool_from_string(const char *n) {
+ int r;
+
+ r = parse_boolean(n);
+ if (r > 0)
+ return SOCKET_ADDRESS_IPV6_ONLY;
+ if (r == 0)
+ return SOCKET_ADDRESS_BOTH;
+
+ return socket_address_bind_ipv6_only_from_string(n);
+}
+
+bool sockaddr_equal(const union sockaddr_union *a, const union sockaddr_union *b) {
+ assert(a);
+ assert(b);
+
+ if (a->sa.sa_family != b->sa.sa_family)
+ return false;
+
+ if (a->sa.sa_family == AF_INET)
+ return a->in.sin_addr.s_addr == b->in.sin_addr.s_addr;
+
+ if (a->sa.sa_family == AF_INET6)
+ return memcmp(&a->in6.sin6_addr, &b->in6.sin6_addr, sizeof(a->in6.sin6_addr)) == 0;
+
+ if (a->sa.sa_family == AF_VSOCK)
+ return a->vm.svm_cid == b->vm.svm_cid;
+
+ return false;
+}
+
+int fd_inc_sndbuf(int fd, size_t n) {
+ int r, value;
+ socklen_t l = sizeof(value);
+
+ r = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &value, &l);
+ if (r >= 0 && l == sizeof(value) && (size_t) value >= n*2)
+ return 0;
+
+ /* If we have the privileges we will ignore the kernel limit. */
+
+ if (setsockopt_int(fd, SOL_SOCKET, SO_SNDBUF, n) < 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_SNDBUFFORCE, n);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+int fd_inc_rcvbuf(int fd, size_t n) {
+ int r, value;
+ socklen_t l = sizeof(value);
+
+ r = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &value, &l);
+ if (r >= 0 && l == sizeof(value) && (size_t) value >= n*2)
+ return 0;
+
+ /* If we have the privileges we will ignore the kernel limit. */
+
+ if (setsockopt_int(fd, SOL_SOCKET, SO_RCVBUF, n) < 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_RCVBUFFORCE, n);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+static const char* const ip_tos_table[] = {
+ [IPTOS_LOWDELAY] = "low-delay",
+ [IPTOS_THROUGHPUT] = "throughput",
+ [IPTOS_RELIABILITY] = "reliability",
+ [IPTOS_LOWCOST] = "low-cost",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ip_tos, int, 0xff);
+
+bool ifname_valid(const char *p) {
+ bool numeric = true;
+
+ /* Checks whether a network interface name is valid. This is inspired by dev_valid_name() in the kernel sources
+ * but slightly stricter, as we only allow non-control, non-space ASCII characters in the interface name. We
+ * also don't permit names that only container numbers, to avoid confusion with numeric interface indexes. */
+
+ if (isempty(p))
+ return false;
+
+ if (strlen(p) >= IFNAMSIZ)
+ return false;
+
+ if (dot_or_dot_dot(p))
+ return false;
+
+ while (*p) {
+ if ((unsigned char) *p >= 127U)
+ return false;
+
+ if ((unsigned char) *p <= 32U)
+ return false;
+
+ if (IN_SET(*p, ':', '/'))
+ return false;
+
+ numeric = numeric && (*p >= '0' && *p <= '9');
+ p++;
+ }
+
+ if (numeric)
+ return false;
+
+ return true;
+}
+
+bool address_label_valid(const char *p) {
+
+ if (isempty(p))
+ return false;
+
+ if (strlen(p) >= IFNAMSIZ)
+ return false;
+
+ while (*p) {
+ if ((uint8_t) *p >= 127U)
+ return false;
+
+ if ((uint8_t) *p <= 31U)
+ return false;
+ p++;
+ }
+
+ return true;
+}
+
+int getpeercred(int fd, struct ucred *ucred) {
+ socklen_t n = sizeof(struct ucred);
+ struct ucred u;
+ int r;
+
+ assert(fd >= 0);
+ assert(ucred);
+
+ r = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &u, &n);
+ if (r < 0)
+ return -errno;
+
+ if (n != sizeof(struct ucred))
+ return -EIO;
+
+ /* Check if the data is actually useful and not suppressed due to namespacing issues */
+ if (!pid_is_valid(u.pid))
+ return -ENODATA;
+
+ /* Note that we don't check UID/GID here, as namespace translation works differently there: instead of
+ * receiving in "invalid" user/group we get the overflow UID/GID. */
+
+ *ucred = u;
+ return 0;
+}
+
+int getpeersec(int fd, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ socklen_t n = 64;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ s = new0(char, n+1);
+ if (!s)
+ return -ENOMEM;
+
+ if (getsockopt(fd, SOL_SOCKET, SO_PEERSEC, s, &n) >= 0)
+ break;
+
+ if (errno != ERANGE)
+ return -errno;
+
+ s = mfree(s);
+ }
+
+ if (isempty(s))
+ return -EOPNOTSUPP;
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int getpeergroups(int fd, gid_t **ret) {
+ socklen_t n = sizeof(gid_t) * 64;
+ _cleanup_free_ gid_t *d = NULL;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ d = malloc(n);
+ if (!d)
+ return -ENOMEM;
+
+ if (getsockopt(fd, SOL_SOCKET, SO_PEERGROUPS, d, &n) >= 0)
+ break;
+
+ if (errno != ERANGE)
+ return -errno;
+
+ d = mfree(d);
+ }
+
+ assert_se(n % sizeof(gid_t) == 0);
+ n /= sizeof(gid_t);
+
+ if ((socklen_t) (int) n != n)
+ return -E2BIG;
+
+ *ret = TAKE_PTR(d);
+
+ return (int) n;
+}
+
+ssize_t send_one_fd_iov_sa(
+ int transport_fd,
+ int fd,
+ struct iovec *iov, size_t iovlen,
+ const struct sockaddr *sa, socklen_t len,
+ int flags) {
+
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_name = (struct sockaddr*) sa,
+ .msg_namelen = len,
+ .msg_iov = iov,
+ .msg_iovlen = iovlen,
+ };
+ ssize_t k;
+
+ assert(transport_fd >= 0);
+
+ /*
+ * We need either an FD or data to send.
+ * If there's nothing, return an error.
+ */
+ if (fd < 0 && !iov)
+ return -EINVAL;
+
+ if (fd >= 0) {
+ struct cmsghdr *cmsg;
+
+ mh.msg_control = &control;
+ mh.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
+
+ mh.msg_controllen = CMSG_SPACE(sizeof(int));
+ }
+ k = sendmsg(transport_fd, &mh, MSG_NOSIGNAL | flags);
+ if (k < 0)
+ return (ssize_t) -errno;
+
+ return k;
+}
+
+int send_one_fd_sa(
+ int transport_fd,
+ int fd,
+ const struct sockaddr *sa, socklen_t len,
+ int flags) {
+
+ assert(fd >= 0);
+
+ return (int) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, sa, len, flags);
+}
+
+ssize_t receive_one_fd_iov(
+ int transport_fd,
+ struct iovec *iov, size_t iovlen,
+ int flags,
+ int *ret_fd) {
+
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = iov,
+ .msg_iovlen = iovlen,
+ };
+ struct cmsghdr *cmsg, *found = NULL;
+ ssize_t k;
+
+ assert(transport_fd >= 0);
+ assert(ret_fd);
+
+ /*
+ * Receive a single FD via @transport_fd. We don't care for
+ * the transport-type. We retrieve a single FD at most, so for
+ * packet-based transports, the caller must ensure to send
+ * only a single FD per packet. This is best used in
+ * combination with send_one_fd().
+ */
+
+ k = recvmsg(transport_fd, &mh, MSG_CMSG_CLOEXEC | flags);
+ if (k < 0)
+ return (ssize_t) -errno;
+
+ CMSG_FOREACH(cmsg, &mh) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(int))) {
+ assert(!found);
+ found = cmsg;
+ break;
+ }
+ }
+
+ if (!found)
+ cmsg_close_all(&mh);
+
+ /* If didn't receive an FD or any data, return an error. */
+ if (k == 0 && !found)
+ return -EIO;
+
+ if (found)
+ *ret_fd = *(int*) CMSG_DATA(found);
+ else
+ *ret_fd = -1;
+
+ return k;
+}
+
+int receive_one_fd(int transport_fd, int flags) {
+ int fd;
+ ssize_t k;
+
+ k = receive_one_fd_iov(transport_fd, NULL, 0, flags, &fd);
+ if (k == 0)
+ return fd;
+
+ /* k must be negative, since receive_one_fd_iov() only returns
+ * a positive value if data was received through the iov. */
+ assert(k < 0);
+ return (int) k;
+}
+
+ssize_t next_datagram_size_fd(int fd) {
+ ssize_t l;
+ int k;
+
+ /* This is a bit like FIONREAD/SIOCINQ, however a bit more powerful. The difference being: recv(MSG_PEEK) will
+ * actually cause the next datagram in the queue to be validated regarding checksums, which FIONREAD doesn't
+ * do. This difference is actually of major importance as we need to be sure that the size returned here
+ * actually matches what we will read with recvmsg() next, as otherwise we might end up allocating a buffer of
+ * the wrong size. */
+
+ l = recv(fd, NULL, 0, MSG_PEEK|MSG_TRUNC);
+ if (l < 0) {
+ if (IN_SET(errno, EOPNOTSUPP, EFAULT))
+ goto fallback;
+
+ return -errno;
+ }
+ if (l == 0)
+ goto fallback;
+
+ return l;
+
+fallback:
+ k = 0;
+
+ /* Some sockets (AF_PACKET) do not support null-sized recv() with MSG_TRUNC set, let's fall back to FIONREAD
+ * for them. Checksums don't matter for raw sockets anyway, hence this should be fine. */
+
+ if (ioctl(fd, FIONREAD, &k) < 0)
+ return -errno;
+
+ return (ssize_t) k;
+}
+
+int flush_accept(int fd) {
+
+ struct pollfd pollfd = {
+ .fd = fd,
+ .events = POLLIN,
+ };
+ int r;
+
+ /* Similar to flush_fd() but flushes all incoming connection by accepting them and immediately closing them. */
+
+ for (;;) {
+ int cfd;
+
+ r = poll(&pollfd, 1, 0);
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return -errno;
+
+ } else if (r == 0)
+ return 0;
+
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (cfd < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN)
+ return 0;
+
+ return -errno;
+ }
+
+ close(cfd);
+ }
+}
+
+struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length) {
+ struct cmsghdr *cmsg;
+
+ assert(mh);
+
+ CMSG_FOREACH(cmsg, mh)
+ if (cmsg->cmsg_level == level &&
+ cmsg->cmsg_type == type &&
+ (length == (socklen_t) -1 || length == cmsg->cmsg_len))
+ return cmsg;
+
+ return NULL;
+}
+
+int socket_ioctl_fd(void) {
+ int fd;
+
+ /* Create a socket to invoke the various network interface ioctl()s on. Traditionally only AF_INET was good for
+ * that. Since kernel 4.6 AF_NETLINK works for this too. We first try to use AF_INET hence, but if that's not
+ * available (for example, because it is made unavailable via SECCOMP or such), we'll fall back to the more
+ * generic AF_NETLINK. */
+
+ fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_GENERIC);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int sockaddr_un_unlink(const struct sockaddr_un *sa) {
+ const char *p, * nul;
+
+ assert(sa);
+
+ if (sa->sun_family != AF_UNIX)
+ return -EPROTOTYPE;
+
+ if (sa->sun_path[0] == 0) /* Nothing to do for abstract sockets */
+ return 0;
+
+ /* The path in .sun_path is not necessarily NUL terminated. Let's fix that. */
+ nul = memchr(sa->sun_path, 0, sizeof(sa->sun_path));
+ if (nul)
+ p = sa->sun_path;
+ else
+ p = memdupa_suffix0(sa->sun_path, sizeof(sa->sun_path));
+
+ if (unlink(p) < 0)
+ return -errno;
+
+ return 1;
+}
+
+int sockaddr_un_set_path(struct sockaddr_un *ret, const char *path) {
+ size_t l;
+
+ assert(ret);
+ assert(path);
+
+ /* Initialize ret->sun_path from the specified argument. This will interpret paths starting with '@' as
+ * abstract namespace sockets, and those starting with '/' as regular filesystem sockets. It won't accept
+ * anything else (i.e. no relative paths), to avoid ambiguities. Note that this function cannot be used to
+ * reference paths in the abstract namespace that include NUL bytes in the name. */
+
+ l = strlen(path);
+ if (l == 0)
+ return -EINVAL;
+ if (!IN_SET(path[0], '/', '@'))
+ return -EINVAL;
+ if (path[1] == 0)
+ return -EINVAL;
+
+ /* Don't allow paths larger than the space in sockaddr_un. Note that we are a tiny bit more restrictive than
+ * the kernel is: we insist on NUL termination (both for abstract namespace and regular file system socket
+ * addresses!), which the kernel doesn't. We do this to reduce chance of incompatibility with other apps that
+ * do not expect non-NUL terminated file system path*/
+ if (l+1 > sizeof(ret->sun_path))
+ return -EINVAL;
+
+ *ret = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ };
+
+ if (path[0] == '@') {
+ /* Abstract namespace socket */
+ memcpy(ret->sun_path + 1, path + 1, l); /* copy *with* trailing NUL byte */
+ return (int) (offsetof(struct sockaddr_un, sun_path) + l); /* 🔥 *don't* 🔥 include trailing NUL in size */
+
+ } else {
+ assert(path[0] == '/');
+
+ /* File system socket */
+ memcpy(ret->sun_path, path, l + 1); /* copy *with* trailing NUL byte */
+ return (int) (offsetof(struct sockaddr_un, sun_path) + l + 1); /* include trailing NUL in size */
+ }
+}
diff --git a/src/basic/socket-util.h b/src/basic/socket-util.h
new file mode 100644
index 0000000..574d2b7
--- /dev/null
+++ b/src/basic/socket-util.h
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <linux/netlink.h>
+#include <linux/if_infiniband.h>
+#include <linux/if_packet.h>
+#include <netinet/ether.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "macro.h"
+#include "missing_socket.h"
+#include "sparse-endian.h"
+
+union sockaddr_union {
+ /* The minimal, abstract version */
+ struct sockaddr sa;
+
+ /* The libc provided version that allocates "enough room" for every protocol */
+ struct sockaddr_storage storage;
+
+ /* Protoctol-specific implementations */
+ struct sockaddr_in in;
+ struct sockaddr_in6 in6;
+ struct sockaddr_un un;
+ struct sockaddr_nl nl;
+ struct sockaddr_ll ll;
+ struct sockaddr_vm vm;
+
+ /* Ensure there is enough space to store Infiniband addresses */
+ uint8_t ll_buffer[offsetof(struct sockaddr_ll, sll_addr) + CONST_MAX(ETH_ALEN, INFINIBAND_ALEN)];
+
+ /* Ensure there is enough space after the AF_UNIX sun_path for one more NUL byte, just to be sure that the path
+ * component is always followed by at least one NUL byte. */
+ uint8_t un_buffer[sizeof(struct sockaddr_un) + 1];
+};
+
+typedef struct SocketAddress {
+ union sockaddr_union sockaddr;
+
+ /* We store the size here explicitly due to the weird
+ * sockaddr_un semantics for abstract sockets */
+ socklen_t size;
+
+ /* Socket type, i.e. SOCK_STREAM, SOCK_DGRAM, ... */
+ int type;
+
+ /* Socket protocol, IPPROTO_xxx, usually 0, except for netlink */
+ int protocol;
+} SocketAddress;
+
+typedef enum SocketAddressBindIPv6Only {
+ SOCKET_ADDRESS_DEFAULT,
+ SOCKET_ADDRESS_BOTH,
+ SOCKET_ADDRESS_IPV6_ONLY,
+ _SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX,
+ _SOCKET_ADDRESS_BIND_IPV6_ONLY_INVALID = -1
+} SocketAddressBindIPv6Only;
+
+#define socket_address_family(a) ((a)->sockaddr.sa.sa_family)
+
+const char* socket_address_type_to_string(int t) _const_;
+int socket_address_type_from_string(const char *s) _pure_;
+
+int socket_address_parse(SocketAddress *a, const char *s);
+int socket_address_parse_and_warn(SocketAddress *a, const char *s);
+int socket_address_parse_netlink(SocketAddress *a, const char *s);
+int socket_address_print(const SocketAddress *a, char **p);
+int socket_address_verify(const SocketAddress *a, bool strict) _pure_;
+
+int sockaddr_un_unlink(const struct sockaddr_un *sa);
+
+static inline int socket_address_unlink(const SocketAddress *a) {
+ return socket_address_family(a) == AF_UNIX ? sockaddr_un_unlink(&a->sockaddr.un) : 0;
+}
+
+bool socket_address_can_accept(const SocketAddress *a) _pure_;
+
+int socket_address_listen(
+ const SocketAddress *a,
+ int flags,
+ int backlog,
+ SocketAddressBindIPv6Only only,
+ const char *bind_to_device,
+ bool reuse_port,
+ bool free_bind,
+ bool transparent,
+ mode_t directory_mode,
+ mode_t socket_mode,
+ const char *label);
+int make_socket_fd(int log_level, const char* address, int type, int flags);
+
+bool socket_address_is(const SocketAddress *a, const char *s, int type);
+bool socket_address_is_netlink(const SocketAddress *a, const char *s);
+
+bool socket_address_matches_fd(const SocketAddress *a, int fd);
+
+bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) _pure_;
+
+const char* socket_address_get_path(const SocketAddress *a);
+
+bool socket_ipv6_is_supported(void);
+
+int sockaddr_port(const struct sockaddr *_sa, unsigned *port);
+
+int sockaddr_pretty(const struct sockaddr *_sa, socklen_t salen, bool translate_ipv6, bool include_port, char **ret);
+int getpeername_pretty(int fd, bool include_port, char **ret);
+int getsockname_pretty(int fd, char **ret);
+
+int socknameinfo_pretty(union sockaddr_union *sa, socklen_t salen, char **_ret);
+
+const char* socket_address_bind_ipv6_only_to_string(SocketAddressBindIPv6Only b) _const_;
+SocketAddressBindIPv6Only socket_address_bind_ipv6_only_from_string(const char *s) _pure_;
+SocketAddressBindIPv6Only socket_address_bind_ipv6_only_or_bool_from_string(const char *s);
+
+int netlink_family_to_string_alloc(int b, char **s);
+int netlink_family_from_string(const char *s) _pure_;
+
+bool sockaddr_equal(const union sockaddr_union *a, const union sockaddr_union *b);
+
+int fd_inc_sndbuf(int fd, size_t n);
+int fd_inc_rcvbuf(int fd, size_t n);
+
+int ip_tos_to_string_alloc(int i, char **s);
+int ip_tos_from_string(const char *s);
+
+bool ifname_valid(const char *p);
+bool address_label_valid(const char *p);
+
+int getpeercred(int fd, struct ucred *ucred);
+int getpeersec(int fd, char **ret);
+int getpeergroups(int fd, gid_t **ret);
+
+ssize_t send_one_fd_iov_sa(
+ int transport_fd,
+ int fd,
+ struct iovec *iov, size_t iovlen,
+ const struct sockaddr *sa, socklen_t len,
+ int flags);
+int send_one_fd_sa(int transport_fd,
+ int fd,
+ const struct sockaddr *sa, socklen_t len,
+ int flags);
+#define send_one_fd_iov(transport_fd, fd, iov, iovlen, flags) send_one_fd_iov_sa(transport_fd, fd, iov, iovlen, NULL, 0, flags)
+#define send_one_fd(transport_fd, fd, flags) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, NULL, 0, flags)
+ssize_t receive_one_fd_iov(int transport_fd, struct iovec *iov, size_t iovlen, int flags, int *ret_fd);
+int receive_one_fd(int transport_fd, int flags);
+
+ssize_t next_datagram_size_fd(int fd);
+
+int flush_accept(int fd);
+
+#define CMSG_FOREACH(cmsg, mh) \
+ for ((cmsg) = CMSG_FIRSTHDR(mh); (cmsg); (cmsg) = CMSG_NXTHDR((mh), (cmsg)))
+
+struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length);
+
+/*
+ * Certain hardware address types (e.g Infiniband) do not fit into sll_addr
+ * (8 bytes) and run over the structure. This macro returns the correct size that
+ * must be passed to kernel.
+ */
+#define SOCKADDR_LL_LEN(sa) \
+ ({ \
+ const struct sockaddr_ll *_sa = &(sa); \
+ size_t _mac_len = sizeof(_sa->sll_addr); \
+ assert(_sa->sll_family == AF_PACKET); \
+ if (be16toh(_sa->sll_hatype) == ARPHRD_ETHER) \
+ _mac_len = MAX(_mac_len, (size_t) ETH_ALEN); \
+ if (be16toh(_sa->sll_hatype) == ARPHRD_INFINIBAND) \
+ _mac_len = MAX(_mac_len, (size_t) INFINIBAND_ALEN); \
+ offsetof(struct sockaddr_ll, sll_addr) + _mac_len; \
+ })
+
+/* Covers only file system and abstract AF_UNIX socket addresses, but not unnamed socket addresses. */
+#define SOCKADDR_UN_LEN(sa) \
+ ({ \
+ const struct sockaddr_un *_sa = &(sa); \
+ assert(_sa->sun_family == AF_UNIX); \
+ offsetof(struct sockaddr_un, sun_path) + \
+ (_sa->sun_path[0] == 0 ? \
+ 1 + strnlen(_sa->sun_path+1, sizeof(_sa->sun_path)-1) : \
+ strnlen(_sa->sun_path, sizeof(_sa->sun_path))+1); \
+ })
+
+int socket_ioctl_fd(void);
+
+int sockaddr_un_set_path(struct sockaddr_un *ret, const char *path);
+
+static inline int setsockopt_int(int fd, int level, int optname, int value) {
+ if (setsockopt(fd, level, optname, &value, sizeof(value)) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/basic/sparse-endian.h b/src/basic/sparse-endian.h
new file mode 100644
index 0000000..9583dda
--- /dev/null
+++ b/src/basic/sparse-endian.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (c) 2012 Josh Triplett <josh@joshtriplett.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#pragma once
+
+#include <byteswap.h>
+#include <endian.h>
+#include <stdint.h>
+
+#ifdef __CHECKER__
+#define __sd_bitwise __attribute__((__bitwise__))
+#define __sd_force __attribute__((__force__))
+#else
+#define __sd_bitwise
+#define __sd_force
+#endif
+
+typedef uint16_t __sd_bitwise le16_t;
+typedef uint16_t __sd_bitwise be16_t;
+typedef uint32_t __sd_bitwise le32_t;
+typedef uint32_t __sd_bitwise be32_t;
+typedef uint64_t __sd_bitwise le64_t;
+typedef uint64_t __sd_bitwise be64_t;
+
+#undef htobe16
+#undef htole16
+#undef be16toh
+#undef le16toh
+#undef htobe32
+#undef htole32
+#undef be32toh
+#undef le32toh
+#undef htobe64
+#undef htole64
+#undef be64toh
+#undef le64toh
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define bswap_16_on_le(x) __bswap_16(x)
+#define bswap_32_on_le(x) __bswap_32(x)
+#define bswap_64_on_le(x) __bswap_64(x)
+#define bswap_16_on_be(x) (x)
+#define bswap_32_on_be(x) (x)
+#define bswap_64_on_be(x) (x)
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define bswap_16_on_le(x) (x)
+#define bswap_32_on_le(x) (x)
+#define bswap_64_on_le(x) (x)
+#define bswap_16_on_be(x) __bswap_16(x)
+#define bswap_32_on_be(x) __bswap_32(x)
+#define bswap_64_on_be(x) __bswap_64(x)
+#endif
+
+static inline le16_t htole16(uint16_t value) { return (le16_t __sd_force) bswap_16_on_be(value); }
+static inline le32_t htole32(uint32_t value) { return (le32_t __sd_force) bswap_32_on_be(value); }
+static inline le64_t htole64(uint64_t value) { return (le64_t __sd_force) bswap_64_on_be(value); }
+
+static inline be16_t htobe16(uint16_t value) { return (be16_t __sd_force) bswap_16_on_le(value); }
+static inline be32_t htobe32(uint32_t value) { return (be32_t __sd_force) bswap_32_on_le(value); }
+static inline be64_t htobe64(uint64_t value) { return (be64_t __sd_force) bswap_64_on_le(value); }
+
+static inline uint16_t le16toh(le16_t value) { return bswap_16_on_be((uint16_t __sd_force)value); }
+static inline uint32_t le32toh(le32_t value) { return bswap_32_on_be((uint32_t __sd_force)value); }
+static inline uint64_t le64toh(le64_t value) { return bswap_64_on_be((uint64_t __sd_force)value); }
+
+static inline uint16_t be16toh(be16_t value) { return bswap_16_on_le((uint16_t __sd_force)value); }
+static inline uint32_t be32toh(be32_t value) { return bswap_32_on_le((uint32_t __sd_force)value); }
+static inline uint64_t be64toh(be64_t value) { return bswap_64_on_le((uint64_t __sd_force)value); }
+
+#undef __sd_bitwise
+#undef __sd_force
diff --git a/src/basic/special.h b/src/basic/special.h
new file mode 100644
index 0000000..379a3d7
--- /dev/null
+++ b/src/basic/special.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#define SPECIAL_DEFAULT_TARGET "default.target"
+
+/* Shutdown targets */
+#define SPECIAL_UMOUNT_TARGET "umount.target"
+/* This is not really intended to be started by directly. This is
+ * mostly so that other targets (reboot/halt/poweroff) can depend on
+ * it to bring all services down that want to be brought down on
+ * system shutdown. */
+#define SPECIAL_SHUTDOWN_TARGET "shutdown.target"
+#define SPECIAL_HALT_TARGET "halt.target"
+#define SPECIAL_POWEROFF_TARGET "poweroff.target"
+#define SPECIAL_REBOOT_TARGET "reboot.target"
+#define SPECIAL_KEXEC_TARGET "kexec.target"
+#define SPECIAL_EXIT_TARGET "exit.target"
+#define SPECIAL_SUSPEND_TARGET "suspend.target"
+#define SPECIAL_HIBERNATE_TARGET "hibernate.target"
+#define SPECIAL_HYBRID_SLEEP_TARGET "hybrid-sleep.target"
+#define SPECIAL_SUSPEND_THEN_HIBERNATE_TARGET "suspend-then-hibernate.target"
+
+/* Special boot targets */
+#define SPECIAL_RESCUE_TARGET "rescue.target"
+#define SPECIAL_EMERGENCY_TARGET "emergency.target"
+#define SPECIAL_MULTI_USER_TARGET "multi-user.target"
+#define SPECIAL_GRAPHICAL_TARGET "graphical.target"
+
+/* Early boot targets */
+#define SPECIAL_SYSINIT_TARGET "sysinit.target"
+#define SPECIAL_SOCKETS_TARGET "sockets.target"
+#define SPECIAL_TIMERS_TARGET "timers.target"
+#define SPECIAL_PATHS_TARGET "paths.target"
+#define SPECIAL_LOCAL_FS_TARGET "local-fs.target"
+#define SPECIAL_LOCAL_FS_PRE_TARGET "local-fs-pre.target"
+#define SPECIAL_INITRD_FS_TARGET "initrd-fs.target"
+#define SPECIAL_INITRD_ROOT_DEVICE_TARGET "initrd-root-device.target"
+#define SPECIAL_INITRD_ROOT_FS_TARGET "initrd-root-fs.target"
+#define SPECIAL_REMOTE_FS_TARGET "remote-fs.target" /* LSB's $remote_fs */
+#define SPECIAL_REMOTE_FS_PRE_TARGET "remote-fs-pre.target"
+#define SPECIAL_SWAP_TARGET "swap.target"
+#define SPECIAL_NETWORK_ONLINE_TARGET "network-online.target"
+#define SPECIAL_TIME_SYNC_TARGET "time-sync.target" /* LSB's $time */
+#define SPECIAL_BASIC_TARGET "basic.target"
+
+/* LSB compatibility */
+#define SPECIAL_NETWORK_TARGET "network.target" /* LSB's $network */
+#define SPECIAL_NSS_LOOKUP_TARGET "nss-lookup.target" /* LSB's $named */
+#define SPECIAL_RPCBIND_TARGET "rpcbind.target" /* LSB's $portmap */
+
+/*
+ * Rules regarding adding further high level targets like the above:
+ *
+ * - Be conservative, only add more of these when we really need
+ * them. We need strong usecases for further additions.
+ *
+ * - When there can be multiple implementations running side-by-side,
+ * it needs to be a .target unit which can pull in all
+ * implementations.
+ *
+ * - If something can be implemented with socket activation, and
+ * without, it needs to be a .target unit, so that it can pull in
+ * the appropriate unit.
+ *
+ * - Otherwise, it should be a .service unit.
+ *
+ * - In some cases it is OK to have both a .service and a .target
+ * unit, i.e. if there can be multiple parallel implementations, but
+ * only one is the "system" one. Example: syslog.
+ *
+ * Or to put this in other words: .service symlinks can be used to
+ * arbitrate between multiple implementations if there can be only one
+ * of a kind. .target units can be used to support multiple
+ * implementations that can run side-by-side.
+ */
+
+/* Magic early boot services */
+#define SPECIAL_FSCK_SERVICE "systemd-fsck@.service"
+#define SPECIAL_QUOTACHECK_SERVICE "systemd-quotacheck.service"
+#define SPECIAL_QUOTAON_SERVICE "quotaon.service"
+#define SPECIAL_REMOUNT_FS_SERVICE "systemd-remount-fs.service"
+
+/* Services systemd relies on */
+#define SPECIAL_DBUS_SERVICE "dbus.service"
+#define SPECIAL_DBUS_SOCKET "dbus.socket"
+#define SPECIAL_JOURNALD_SOCKET "systemd-journald.socket"
+#define SPECIAL_JOURNALD_SERVICE "systemd-journald.service"
+#define SPECIAL_TMPFILES_SETUP_SERVICE "systemd-tmpfiles-setup.service"
+
+/* Magic init signals */
+#define SPECIAL_KBREQUEST_TARGET "kbrequest.target"
+#define SPECIAL_SIGPWR_TARGET "sigpwr.target"
+#define SPECIAL_CTRL_ALT_DEL_TARGET "ctrl-alt-del.target"
+
+/* Where we add all our system units, users and machines by default */
+#define SPECIAL_SYSTEM_SLICE "system.slice"
+#define SPECIAL_USER_SLICE "user.slice"
+#define SPECIAL_MACHINE_SLICE "machine.slice"
+#define SPECIAL_ROOT_SLICE "-.slice"
+
+/* The scope unit systemd itself lives in. */
+#define SPECIAL_INIT_SCOPE "init.scope"
+
+/* The root directory. */
+#define SPECIAL_ROOT_MOUNT "-.mount"
diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c
new file mode 100644
index 0000000..ea2bbc3
--- /dev/null
+++ b/src/basic/stat-util.c
@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/magic.h>
+#include <sched.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+
+int is_symlink(const char *path) {
+ struct stat info;
+
+ assert(path);
+
+ if (lstat(path, &info) < 0)
+ return -errno;
+
+ return !!S_ISLNK(info.st_mode);
+}
+
+int is_dir(const char* path, bool follow) {
+ struct stat st;
+ int r;
+
+ assert(path);
+
+ if (follow)
+ r = stat(path, &st);
+ else
+ r = lstat(path, &st);
+ if (r < 0)
+ return -errno;
+
+ return !!S_ISDIR(st.st_mode);
+}
+
+int is_dir_fd(int fd) {
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return !!S_ISDIR(st.st_mode);
+}
+
+int is_device_node(const char *path) {
+ struct stat info;
+
+ assert(path);
+
+ if (lstat(path, &info) < 0)
+ return -errno;
+
+ return !!(S_ISBLK(info.st_mode) || S_ISCHR(info.st_mode));
+}
+
+int dir_is_empty_at(int dir_fd, const char *path) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ if (path)
+ fd = openat(dir_fd, path, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ else
+ fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (fd < 0)
+ return -errno;
+
+ d = fdopendir(fd);
+ if (!d)
+ return -errno;
+ fd = -1;
+
+ FOREACH_DIRENT(de, d, return -errno)
+ return 0;
+
+ return 1;
+}
+
+bool null_or_empty(struct stat *st) {
+ assert(st);
+
+ if (S_ISREG(st->st_mode) && st->st_size <= 0)
+ return true;
+
+ /* We don't want to hardcode the major/minor of /dev/null,
+ * hence we do a simpler "is this a device node?" check. */
+
+ if (S_ISCHR(st->st_mode) || S_ISBLK(st->st_mode))
+ return true;
+
+ return false;
+}
+
+int null_or_empty_path(const char *fn) {
+ struct stat st;
+
+ assert(fn);
+
+ if (stat(fn, &st) < 0)
+ return -errno;
+
+ return null_or_empty(&st);
+}
+
+int null_or_empty_fd(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return null_or_empty(&st);
+}
+
+int path_is_read_only_fs(const char *path) {
+ struct statvfs st;
+
+ assert(path);
+
+ if (statvfs(path, &st) < 0)
+ return -errno;
+
+ if (st.f_flag & ST_RDONLY)
+ return true;
+
+ /* On NFS, statvfs() might not reflect whether we can actually
+ * write to the remote share. Let's try again with
+ * access(W_OK) which is more reliable, at least sometimes. */
+ if (access(path, W_OK) < 0 && errno == EROFS)
+ return true;
+
+ return false;
+}
+
+int files_same(const char *filea, const char *fileb, int flags) {
+ struct stat a, b;
+
+ assert(filea);
+ assert(fileb);
+
+ if (fstatat(AT_FDCWD, filea, &a, flags) < 0)
+ return -errno;
+
+ if (fstatat(AT_FDCWD, fileb, &b, flags) < 0)
+ return -errno;
+
+ return a.st_dev == b.st_dev &&
+ a.st_ino == b.st_ino;
+}
+
+bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) {
+ assert(s);
+ assert_cc(sizeof(statfs_f_type_t) >= sizeof(s->f_type));
+
+ return F_TYPE_EQUAL(s->f_type, magic_value);
+}
+
+int fd_is_fs_type(int fd, statfs_f_type_t magic_value) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_fs_type(&s, magic_value);
+}
+
+int path_is_fs_type(const char *path, statfs_f_type_t magic_value) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ return fd_is_fs_type(fd, magic_value);
+}
+
+bool is_temporary_fs(const struct statfs *s) {
+ return is_fs_type(s, TMPFS_MAGIC) ||
+ is_fs_type(s, RAMFS_MAGIC);
+}
+
+bool is_network_fs(const struct statfs *s) {
+ return is_fs_type(s, CIFS_MAGIC_NUMBER) ||
+ is_fs_type(s, CODA_SUPER_MAGIC) ||
+ is_fs_type(s, NCP_SUPER_MAGIC) ||
+ is_fs_type(s, NFS_SUPER_MAGIC) ||
+ is_fs_type(s, SMB_SUPER_MAGIC) ||
+ is_fs_type(s, V9FS_MAGIC) ||
+ is_fs_type(s, AFS_SUPER_MAGIC) ||
+ is_fs_type(s, OCFS2_SUPER_MAGIC);
+}
+
+int fd_is_temporary_fs(int fd) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_temporary_fs(&s);
+}
+
+int fd_is_network_fs(int fd) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_network_fs(&s);
+}
+
+int fd_is_network_ns(int fd) {
+ struct statfs s;
+ int r;
+
+ /* Checks whether the specified file descriptor refers to a network namespace. On old kernels there's no nice
+ * way to detect that, hence on those we'll return a recognizable error (EUCLEAN), so that callers can handle
+ * this somewhat nicely.
+ *
+ * This function returns > 0 if the fd definitely refers to a network namespace, 0 if it definitely does not
+ * refer to a network namespace, -EUCLEAN if we can't determine, and other negative error codes on error. */
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ if (!is_fs_type(&s, NSFS_MAGIC)) {
+ /* On really old kernels, there was no "nsfs", and network namespace sockets belonged to procfs
+ * instead. Handle that in a somewhat smart way. */
+
+ if (is_fs_type(&s, PROC_SUPER_MAGIC)) {
+ struct statfs t;
+
+ /* OK, so it is procfs. Let's see if our own network namespace is procfs, too. If so, then the
+ * passed fd might refer to a network namespace, but we can't know for sure. In that case,
+ * return a recognizable error. */
+
+ if (statfs("/proc/self/ns/net", &t) < 0)
+ return -errno;
+
+ if (s.f_type == t.f_type)
+ return -EUCLEAN; /* It's possible, we simply don't know */
+ }
+
+ return 0; /* No! */
+ }
+
+ r = ioctl(fd, NS_GET_NSTYPE);
+ if (r < 0) {
+ if (errno == ENOTTY) /* Old kernels didn't know this ioctl, let's also return a recognizable error in that case */
+ return -EUCLEAN;
+
+ return -errno;
+ }
+
+ return r == CLONE_NEWNET;
+}
+
+int path_is_temporary_fs(const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ return fd_is_temporary_fs(fd);
+}
+
+int stat_verify_regular(const struct stat *st) {
+ assert(st);
+
+ /* Checks whether the specified stat() structure refers to a regular file. If not returns an appropriate error
+ * code. */
+
+ if (S_ISDIR(st->st_mode))
+ return -EISDIR;
+
+ if (S_ISLNK(st->st_mode))
+ return -ELOOP;
+
+ if (!S_ISREG(st->st_mode))
+ return -EBADFD;
+
+ return 0;
+}
+
+int fd_verify_regular(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return stat_verify_regular(&st);
+}
+
+int stat_verify_directory(const struct stat *st) {
+ assert(st);
+
+ if (S_ISLNK(st->st_mode))
+ return -ELOOP;
+
+ if (!S_ISDIR(st->st_mode))
+ return -ENOTDIR;
+
+ return 0;
+}
+
+int fd_verify_directory(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return stat_verify_directory(&st);
+}
+
+int device_path_make_major_minor(mode_t mode, dev_t devno, char **ret) {
+ const char *t;
+
+ /* Generates the /dev/{char|block}/MAJOR:MINOR path for a dev_t */
+
+ if (S_ISCHR(mode))
+ t = "char";
+ else if (S_ISBLK(mode))
+ t = "block";
+ else
+ return -ENODEV;
+
+ if (asprintf(ret, "/dev/%s/%u:%u", t, major(devno), minor(devno)) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int device_path_make_canonical(mode_t mode, dev_t devno, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ /* Finds the canonical path for a device, i.e. resolves the /dev/{char|block}/MAJOR:MINOR path to the end. */
+
+ assert(ret);
+
+ if (major(devno) == 0 && minor(devno) == 0) {
+ char *s;
+
+ /* A special hack to make sure our 'inaccessible' device nodes work. They won't have symlinks in
+ * /dev/block/ and /dev/char/, hence we handle them specially here. */
+
+ if (S_ISCHR(mode))
+ s = strdup("/run/systemd/inaccessible/chr");
+ else if (S_ISBLK(mode))
+ s = strdup("/run/systemd/inaccessible/blk");
+ else
+ return -ENODEV;
+
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+ }
+
+ r = device_path_make_major_minor(mode, devno, &p);
+ if (r < 0)
+ return r;
+
+ return chase_symlinks(p, NULL, 0, ret);
+}
+
+int device_path_parse_major_minor(const char *path, mode_t *ret_mode, dev_t *ret_devno) {
+ mode_t mode;
+ dev_t devno;
+ int r;
+
+ /* Tries to extract the major/minor directly from the device path if we can. Handles /dev/block/ and /dev/char/
+ * paths, as well out synthetic inaccessible device nodes. Never goes to disk. Returns -ENODEV if the device
+ * path cannot be parsed like this. */
+
+ if (path_equal(path, "/run/systemd/inaccessible/chr")) {
+ mode = S_IFCHR;
+ devno = makedev(0, 0);
+ } else if (path_equal(path, "/run/systemd/inaccessible/blk")) {
+ mode = S_IFBLK;
+ devno = makedev(0, 0);
+ } else {
+ const char *w;
+
+ w = path_startswith(path, "/dev/block/");
+ if (w)
+ mode = S_IFBLK;
+ else {
+ w = path_startswith(path, "/dev/char/");
+ if (!w)
+ return -ENODEV;
+
+ mode = S_IFCHR;
+ }
+
+ r = parse_dev(w, &devno);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_mode)
+ *ret_mode = mode;
+ if (ret_devno)
+ *ret_devno = devno;
+
+ return 0;
+}
diff --git a/src/basic/stat-util.h b/src/basic/stat-util.h
new file mode 100644
index 0000000..74fb725
--- /dev/null
+++ b/src/basic/stat-util.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+
+#include "macro.h"
+
+int is_symlink(const char *path);
+int is_dir(const char *path, bool follow);
+int is_dir_fd(int fd);
+int is_device_node(const char *path);
+
+int dir_is_empty_at(int dir_fd, const char *path);
+static inline int dir_is_empty(const char *path) {
+ return dir_is_empty_at(AT_FDCWD, path);
+}
+
+static inline int dir_is_populated(const char *path) {
+ int r;
+ r = dir_is_empty(path);
+ if (r < 0)
+ return r;
+ return !r;
+}
+
+bool null_or_empty(struct stat *st) _pure_;
+int null_or_empty_path(const char *fn);
+int null_or_empty_fd(int fd);
+
+int path_is_read_only_fs(const char *path);
+
+int files_same(const char *filea, const char *fileb, int flags);
+
+/* The .f_type field of struct statfs is really weird defined on
+ * different archs. Let's give its type a name. */
+typedef typeof(((struct statfs*)NULL)->f_type) statfs_f_type_t;
+
+bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) _pure_;
+int fd_is_fs_type(int fd, statfs_f_type_t magic_value);
+int path_is_fs_type(const char *path, statfs_f_type_t magic_value);
+
+bool is_temporary_fs(const struct statfs *s) _pure_;
+bool is_network_fs(const struct statfs *s) _pure_;
+
+int fd_is_temporary_fs(int fd);
+int fd_is_network_fs(int fd);
+
+int fd_is_network_ns(int fd);
+
+int path_is_temporary_fs(const char *path);
+
+/* Because statfs.t_type can be int on some architectures, we have to cast
+ * the const magic to the type, otherwise the compiler warns about
+ * signed/unsigned comparison, because the magic can be 32 bit unsigned.
+ */
+#define F_TYPE_EQUAL(a, b) (a == (typeof(a)) b)
+
+int stat_verify_regular(const struct stat *st);
+int fd_verify_regular(int fd);
+
+int stat_verify_directory(const struct stat *st);
+int fd_verify_directory(int fd);
+
+/* glibc and the Linux kernel have different ideas about the major/minor size. These calls will check whether the
+ * specified major is valid by the Linux kernel's standards, not by glibc's. Linux has 20bits of minor, and 12 bits of
+ * major space. See MINORBITS in linux/kdev_t.h in the kernel sources. (If you wonder why we define _y here, instead of
+ * comparing directly >= 0: it's to trick out -Wtype-limits, which would otherwise complain if the type is unsigned, as
+ * such a test would be pointless in such a case.) */
+
+#define DEVICE_MAJOR_VALID(x) \
+ ({ \
+ typeof(x) _x = (x), _y = 0; \
+ _x >= _y && _x < (UINT32_C(1) << 12); \
+ \
+ })
+
+#define DEVICE_MINOR_VALID(x) \
+ ({ \
+ typeof(x) _x = (x), _y = 0; \
+ _x >= _y && _x < (UINT32_C(1) << 20); \
+ })
+
+int device_path_make_major_minor(mode_t mode, dev_t devno, char **ret);
+int device_path_make_canonical(mode_t mode, dev_t devno, char **ret);
+int device_path_parse_major_minor(const char *path, mode_t *ret_mode, dev_t *ret_devno);
diff --git a/src/basic/static-destruct.h b/src/basic/static-destruct.h
new file mode 100644
index 0000000..443c0e8
--- /dev/null
+++ b/src/basic/static-destruct.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include "alloc-util.h"
+#include "macro.h"
+
+/* A framework for registering static variables that shall be freed on shutdown of a process. It's a bit like gcc's
+ * destructor attribute, but allows us to precisely schedule when we want to free the variables. This is supposed to
+ * feel a bit like the gcc cleanup attribute, but for static variables. Note that this does not work for static
+ * variables declared in .so's, as the list is private to the same linking unit. But maybe that's a good thing. */
+
+typedef struct StaticDestructor {
+ void *data;
+ free_func_t destroy;
+} StaticDestructor;
+
+#define STATIC_DESTRUCTOR_REGISTER(variable, func) \
+ _STATIC_DESTRUCTOR_REGISTER(UNIQ, variable, func)
+
+#define _STATIC_DESTRUCTOR_REGISTER(uq, variable, func) \
+ /* Type-safe destructor */ \
+ static void UNIQ_T(static_destructor_wrapper, uq)(void *p) { \
+ typeof(variable) *q = p; \
+ func(q); \
+ } \
+ /* The actual destructor structure we place in a special section to find it */ \
+ _section_("SYSTEMD_STATIC_DESTRUCT") \
+ /* We pick pointer alignment, since that is apparently what gcc does for static variables */ \
+ _alignptr_ \
+ /* Make sure this is not dropped from the image because not explicitly referenced */ \
+ _used_ \
+ /* Make sure that AddressSanitizer doesn't pad this variable: we want everything in this section packed next to each other so that we can enumerate it. */ \
+ _variable_no_sanitize_address_ \
+ static const StaticDestructor UNIQ_T(static_destructor_entry, uq) = { \
+ .data = &(variable), \
+ .destroy = UNIQ_T(static_destructor_wrapper, uq), \
+ }
+
+/* Beginning and end of our section listing the destructors. We define these as weak as we want this to work even if
+ * there's not a single destructor is defined in which case the section will be missing. */
+extern const struct StaticDestructor _weak_ __start_SYSTEMD_STATIC_DESTRUCT[];
+extern const struct StaticDestructor _weak_ __stop_SYSTEMD_STATIC_DESTRUCT[];
+
+/* The function to destroy everything. (Note that this must be static inline, as it's key that it remains in the same
+ * linking unit as the variables we want to destroy. */
+static inline void static_destruct(void) {
+ const StaticDestructor *d;
+
+ if (!__start_SYSTEMD_STATIC_DESTRUCT)
+ return;
+
+ d = ALIGN_TO_PTR(__start_SYSTEMD_STATIC_DESTRUCT, sizeof(void*));
+ while (d < __stop_SYSTEMD_STATIC_DESTRUCT) {
+ d->destroy(d->data);
+ d = ALIGN_TO_PTR(d + 1, sizeof(void*));
+ }
+}
diff --git a/src/basic/stdio-util.h b/src/basic/stdio-util.h
new file mode 100644
index 0000000..dc67b6e
--- /dev/null
+++ b/src/basic/stdio-util.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <printf.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "util.h"
+
+#define snprintf_ok(buf, len, fmt, ...) \
+ ((size_t) snprintf(buf, len, fmt, __VA_ARGS__) < (len))
+
+#define xsprintf(buf, fmt, ...) \
+ assert_message_se(snprintf_ok(buf, ELEMENTSOF(buf), fmt, __VA_ARGS__), "xsprintf: " #buf "[] must be big enough")
+
+#define VA_FORMAT_ADVANCE(format, ap) \
+do { \
+ int _argtypes[128]; \
+ size_t _i, _k; \
+ /* See https://github.com/google/sanitizers/issues/992 */ \
+ if (HAS_FEATURE_MEMORY_SANITIZER) \
+ zero(_argtypes); \
+ _k = parse_printf_format((format), ELEMENTSOF(_argtypes), _argtypes); \
+ assert(_k < ELEMENTSOF(_argtypes)); \
+ for (_i = 0; _i < _k; _i++) { \
+ if (_argtypes[_i] & PA_FLAG_PTR) { \
+ (void) va_arg(ap, void*); \
+ continue; \
+ } \
+ \
+ switch (_argtypes[_i]) { \
+ case PA_INT: \
+ case PA_INT|PA_FLAG_SHORT: \
+ case PA_CHAR: \
+ (void) va_arg(ap, int); \
+ break; \
+ case PA_INT|PA_FLAG_LONG: \
+ (void) va_arg(ap, long int); \
+ break; \
+ case PA_INT|PA_FLAG_LONG_LONG: \
+ (void) va_arg(ap, long long int); \
+ break; \
+ case PA_WCHAR: \
+ (void) va_arg(ap, wchar_t); \
+ break; \
+ case PA_WSTRING: \
+ case PA_STRING: \
+ case PA_POINTER: \
+ (void) va_arg(ap, void*); \
+ break; \
+ case PA_FLOAT: \
+ case PA_DOUBLE: \
+ (void) va_arg(ap, double); \
+ break; \
+ case PA_DOUBLE|PA_FLAG_LONG_DOUBLE: \
+ (void) va_arg(ap, long double); \
+ break; \
+ default: \
+ assert_not_reached("Unknown format string argument."); \
+ } \
+ } \
+} while (false)
diff --git a/src/basic/strbuf.c b/src/basic/strbuf.c
new file mode 100644
index 0000000..81f4f21
--- /dev/null
+++ b/src/basic/strbuf.c
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "strbuf.h"
+#include "util.h"
+
+/*
+ * Strbuf stores given strings in a single continuous allocated memory
+ * area. Identical strings are de-duplicated and return the same offset
+ * as the first string stored. If the tail of a string already exists
+ * in the buffer, the tail is returned.
+ *
+ * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
+ * information about the stored strings.
+ *
+ * Example of udev rules:
+ * $ ./udevadm test .
+ * ...
+ * read rules file: /usr/lib/udev/rules.d/99-systemd.rules
+ * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
+ * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
+ * ...
+ */
+
+struct strbuf *strbuf_new(void) {
+ struct strbuf *str;
+
+ str = new(struct strbuf, 1);
+ if (!str)
+ return NULL;
+ *str = (struct strbuf) {
+ .buf = new0(char, 1),
+ .root = new0(struct strbuf_node, 1),
+ .len = 1,
+ .nodes_count = 1,
+ };
+ if (!str->buf || !str->root) {
+ free(str->buf);
+ free(str->root);
+ return mfree(str);
+ }
+
+ return str;
+}
+
+static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) {
+ size_t i;
+
+ for (i = 0; i < node->children_count; i++)
+ strbuf_node_cleanup(node->children[i].child);
+ free(node->children);
+ return mfree(node);
+}
+
+/* clean up trie data, leave only the string buffer */
+void strbuf_complete(struct strbuf *str) {
+ if (!str)
+ return;
+ if (str->root)
+ str->root = strbuf_node_cleanup(str->root);
+}
+
+/* clean up everything */
+void strbuf_cleanup(struct strbuf *str) {
+ if (!str)
+ return;
+
+ strbuf_complete(str);
+ free(str->buf);
+ free(str);
+}
+
+static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
+ const struct strbuf_child_entry *n2) {
+ return n1->c - n2->c;
+}
+
+static void bubbleinsert(struct strbuf_node *node,
+ uint8_t c,
+ struct strbuf_node *node_child) {
+
+ struct strbuf_child_entry new = {
+ .c = c,
+ .child = node_child,
+ };
+ int left = 0, right = node->children_count;
+
+ while (right > left) {
+ int middle = (right + left) / 2 ;
+ if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
+ left = middle + 1;
+ else
+ right = middle;
+ }
+
+ memmove(node->children + left + 1, node->children + left,
+ sizeof(struct strbuf_child_entry) * (node->children_count - left));
+ node->children[left] = new;
+
+ node->children_count++;
+}
+
+/* add string, return the index/offset into the buffer */
+ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
+ uint8_t c;
+ struct strbuf_node *node;
+ size_t depth;
+ char *buf_new;
+ struct strbuf_child_entry *child;
+ struct strbuf_node *node_child;
+ ssize_t off;
+
+ if (!str->root)
+ return -EINVAL;
+
+ /* search string; start from last character to find possibly matching tails */
+
+ str->in_count++;
+ if (len == 0) {
+ str->dedup_count++;
+ return 0;
+ }
+ str->in_len += len;
+
+ node = str->root;
+ for (depth = 0; depth <= len; depth++) {
+ struct strbuf_child_entry search;
+
+ /* match against current node */
+ off = node->value_off + node->value_len - len;
+ if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
+ str->dedup_len += len;
+ str->dedup_count++;
+ return off;
+ }
+
+ c = s[len - 1 - depth];
+
+ /* lookup child node */
+ search.c = c;
+ child = typesafe_bsearch(&search, node->children, node->children_count, strbuf_children_cmp);
+ if (!child)
+ break;
+ node = child->child;
+ }
+
+ /* add new string */
+ buf_new = realloc(str->buf, str->len + len+1);
+ if (!buf_new)
+ return -ENOMEM;
+ str->buf = buf_new;
+ off = str->len;
+ memcpy(str->buf + off, s, len);
+ str->len += len;
+ str->buf[str->len++] = '\0';
+
+ /* new node */
+ node_child = new(struct strbuf_node, 1);
+ if (!node_child)
+ return -ENOMEM;
+ *node_child = (struct strbuf_node) {
+ .value_off = off,
+ .value_len = len,
+ };
+
+ /* extend array, add new entry, sort for bisection */
+ child = reallocarray(node->children, node->children_count + 1, sizeof(struct strbuf_child_entry));
+ if (!child) {
+ free(node_child);
+ return -ENOMEM;
+ }
+
+ str->nodes_count++;
+
+ node->children = child;
+ bubbleinsert(node, c, node_child);
+
+ return off;
+}
diff --git a/src/basic/strbuf.h b/src/basic/strbuf.h
new file mode 100644
index 0000000..a36944a
--- /dev/null
+++ b/src/basic/strbuf.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+struct strbuf {
+ char *buf;
+ size_t len;
+ struct strbuf_node *root;
+
+ size_t nodes_count;
+ size_t in_count;
+ size_t in_len;
+ size_t dedup_len;
+ size_t dedup_count;
+};
+
+struct strbuf_node {
+ size_t value_off;
+ size_t value_len;
+
+ struct strbuf_child_entry *children;
+ uint8_t children_count;
+};
+
+struct strbuf_child_entry {
+ uint8_t c;
+ struct strbuf_node *child;
+};
+
+struct strbuf *strbuf_new(void);
+ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len);
+void strbuf_complete(struct strbuf *str);
+void strbuf_cleanup(struct strbuf *str);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct strbuf*, strbuf_cleanup);
diff --git a/src/basic/string-table.c b/src/basic/string-table.c
new file mode 100644
index 0000000..34931b0
--- /dev/null
+++ b/src/basic/string-table.c
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "string-table.h"
+#include "string-util.h"
+
+ssize_t string_table_lookup(const char * const *table, size_t len, const char *key) {
+ size_t i;
+
+ if (!key)
+ return -1;
+
+ for (i = 0; i < len; ++i)
+ if (streq_ptr(table[i], key))
+ return (ssize_t) i;
+
+ return -1;
+}
diff --git a/src/basic/string-table.h b/src/basic/string-table.h
new file mode 100644
index 0000000..228c12a
--- /dev/null
+++ b/src/basic/string-table.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+ssize_t string_table_lookup(const char * const *table, size_t len, const char *key);
+
+/* For basic lookup tables with strictly enumerated entries */
+#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \
+ scope const char *name##_to_string(type i) { \
+ if (i < 0 || i >= (type) ELEMENTSOF(name##_table)) \
+ return NULL; \
+ return name##_table[i]; \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope) \
+ scope type name##_from_string(const char *s) { \
+ return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes,scope) \
+ scope type name##_from_string(const char *s) { \
+ int b; \
+ if (!s) \
+ return -1; \
+ b = parse_boolean(s); \
+ if (b == 0) \
+ return (type) 0; \
+ else if (b > 0) \
+ return yes; \
+ return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,scope) \
+ scope int name##_to_string_alloc(type i, char **str) { \
+ char *s; \
+ if (i < 0 || i > max) \
+ return -ERANGE; \
+ if (i < (type) ELEMENTSOF(name##_table)) { \
+ s = strdup(name##_table[i]); \
+ if (!s) \
+ return -ENOMEM; \
+ } else { \
+ if (asprintf(&s, "%i", i) < 0) \
+ return -ENOMEM; \
+ } \
+ *str = s; \
+ return 0; \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,scope) \
+ scope type name##_from_string(const char *s) { \
+ type i; \
+ unsigned u = 0; \
+ if (!s) \
+ return (type) -1; \
+ for (i = 0; i < (type) ELEMENTSOF(name##_table); i++) \
+ if (streq_ptr(name##_table[i], s)) \
+ return i; \
+ if (safe_atou(s, &u) >= 0 && u <= max) \
+ return (type) u; \
+ return (type) -1; \
+ } \
+
+#define _DEFINE_STRING_TABLE_LOOKUP(name,type,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope)
+
+#define _DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes,scope)
+
+#define DEFINE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,static)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,static)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,static)
+
+#define DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes) _DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes,)
+
+/* For string conversions where numbers are also acceptable */
+#define DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(name,type,max) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,)
+
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,static)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,static)
+
+#define DUMP_STRING_TABLE(name,type,max) \
+ do { \
+ type _k; \
+ flockfile(stdout); \
+ for (_k = 0; _k < (max); _k++) { \
+ const char *_t; \
+ _t = name##_to_string(_k); \
+ if (!_t) \
+ continue; \
+ fputs_unlocked(_t, stdout); \
+ fputc_unlocked('\n', stdout); \
+ } \
+ funlockfile(stdout); \
+ } while(false)
diff --git a/src/basic/string-util.c b/src/basic/string-util.c
new file mode 100644
index 0000000..93917bc
--- /dev/null
+++ b/src/basic/string-util.c
@@ -0,0 +1,1099 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "gunicode.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "fileio.h"
+
+int strcmp_ptr(const char *a, const char *b) {
+
+ /* Like strcmp(), but tries to make sense of NULL pointers */
+ if (a && b)
+ return strcmp(a, b);
+
+ if (!a && b)
+ return -1;
+
+ if (a && !b)
+ return 1;
+
+ return 0;
+}
+
+char* endswith(const char *s, const char *postfix) {
+ size_t sl, pl;
+
+ assert(s);
+ assert(postfix);
+
+ sl = strlen(s);
+ pl = strlen(postfix);
+
+ if (pl == 0)
+ return (char*) s + sl;
+
+ if (sl < pl)
+ return NULL;
+
+ if (memcmp(s + sl - pl, postfix, pl) != 0)
+ return NULL;
+
+ return (char*) s + sl - pl;
+}
+
+char* endswith_no_case(const char *s, const char *postfix) {
+ size_t sl, pl;
+
+ assert(s);
+ assert(postfix);
+
+ sl = strlen(s);
+ pl = strlen(postfix);
+
+ if (pl == 0)
+ return (char*) s + sl;
+
+ if (sl < pl)
+ return NULL;
+
+ if (strcasecmp(s + sl - pl, postfix) != 0)
+ return NULL;
+
+ return (char*) s + sl - pl;
+}
+
+char* first_word(const char *s, const char *word) {
+ size_t sl, wl;
+ const char *p;
+
+ assert(s);
+ assert(word);
+
+ /* Checks if the string starts with the specified word, either
+ * followed by NUL or by whitespace. Returns a pointer to the
+ * NUL or the first character after the whitespace. */
+
+ sl = strlen(s);
+ wl = strlen(word);
+
+ if (sl < wl)
+ return NULL;
+
+ if (wl == 0)
+ return (char*) s;
+
+ if (memcmp(s, word, wl) != 0)
+ return NULL;
+
+ p = s + wl;
+ if (*p == 0)
+ return (char*) p;
+
+ if (!strchr(WHITESPACE, *p))
+ return NULL;
+
+ p += strspn(p, WHITESPACE);
+ return (char*) p;
+}
+
+static size_t strcspn_escaped(const char *s, const char *reject) {
+ bool escaped = false;
+ int n;
+
+ for (n=0; s[n]; n++) {
+ if (escaped)
+ escaped = false;
+ else if (s[n] == '\\')
+ escaped = true;
+ else if (strchr(reject, s[n]))
+ break;
+ }
+
+ /* if s ends in \, return index of previous char */
+ return n - escaped;
+}
+
+/* Split a string into words. */
+const char* split(const char **state, size_t *l, const char *separator, SplitFlags flags) {
+ const char *current;
+
+ current = *state;
+
+ if (!*current) {
+ assert(**state == '\0');
+ return NULL;
+ }
+
+ current += strspn(current, separator);
+ if (!*current) {
+ *state = current;
+ return NULL;
+ }
+
+ if (flags & SPLIT_QUOTES && strchr("\'\"", *current)) {
+ char quotechars[2] = {*current, '\0'};
+
+ *l = strcspn_escaped(current + 1, quotechars);
+ if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
+ (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
+ /* right quote missing or garbage at the end */
+ if (flags & SPLIT_RELAX) {
+ *state = current + *l + 1 + (current[*l + 1] != '\0');
+ return current + 1;
+ }
+ *state = current;
+ return NULL;
+ }
+ *state = current++ + *l + 2;
+ } else if (flags & SPLIT_QUOTES) {
+ *l = strcspn_escaped(current, separator);
+ if (current[*l] && !strchr(separator, current[*l]) && !(flags & SPLIT_RELAX)) {
+ /* unfinished escape */
+ *state = current;
+ return NULL;
+ }
+ *state = current + *l;
+ } else {
+ *l = strcspn(current, separator);
+ *state = current + *l;
+ }
+
+ return current;
+}
+
+char *strnappend(const char *s, const char *suffix, size_t b) {
+ size_t a;
+ char *r;
+
+ if (!s && !suffix)
+ return strdup("");
+
+ if (!s)
+ return strndup(suffix, b);
+
+ if (!suffix)
+ return strdup(s);
+
+ assert(s);
+ assert(suffix);
+
+ a = strlen(s);
+ if (b > ((size_t) -1) - a)
+ return NULL;
+
+ r = new(char, a+b+1);
+ if (!r)
+ return NULL;
+
+ memcpy(r, s, a);
+ memcpy(r+a, suffix, b);
+ r[a+b] = 0;
+
+ return r;
+}
+
+char *strappend(const char *s, const char *suffix) {
+ return strnappend(s, suffix, strlen_ptr(suffix));
+}
+
+char *strjoin_real(const char *x, ...) {
+ va_list ap;
+ size_t l;
+ char *r, *p;
+
+ va_start(ap, x);
+
+ if (x) {
+ l = strlen(x);
+
+ for (;;) {
+ const char *t;
+ size_t n;
+
+ t = va_arg(ap, const char *);
+ if (!t)
+ break;
+
+ n = strlen(t);
+ if (n > ((size_t) -1) - l) {
+ va_end(ap);
+ return NULL;
+ }
+
+ l += n;
+ }
+ } else
+ l = 0;
+
+ va_end(ap);
+
+ r = new(char, l+1);
+ if (!r)
+ return NULL;
+
+ if (x) {
+ p = stpcpy(r, x);
+
+ va_start(ap, x);
+
+ for (;;) {
+ const char *t;
+
+ t = va_arg(ap, const char *);
+ if (!t)
+ break;
+
+ p = stpcpy(p, t);
+ }
+
+ va_end(ap);
+ } else
+ r[0] = 0;
+
+ return r;
+}
+
+char *strstrip(char *s) {
+ if (!s)
+ return NULL;
+
+ /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */
+
+ return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE);
+}
+
+char *delete_chars(char *s, const char *bad) {
+ char *f, *t;
+
+ /* Drops all specified bad characters, regardless where in the string */
+
+ if (!s)
+ return NULL;
+
+ if (!bad)
+ bad = WHITESPACE;
+
+ for (f = s, t = s; *f; f++) {
+ if (strchr(bad, *f))
+ continue;
+
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ return s;
+}
+
+char *delete_trailing_chars(char *s, const char *bad) {
+ char *p, *c = s;
+
+ /* Drops all specified bad characters, at the end of the string */
+
+ if (!s)
+ return NULL;
+
+ if (!bad)
+ bad = WHITESPACE;
+
+ for (p = s; *p; p++)
+ if (!strchr(bad, *p))
+ c = p + 1;
+
+ *c = 0;
+
+ return s;
+}
+
+char *truncate_nl(char *s) {
+ assert(s);
+
+ s[strcspn(s, NEWLINE)] = 0;
+ return s;
+}
+
+char ascii_tolower(char x) {
+
+ if (x >= 'A' && x <= 'Z')
+ return x - 'A' + 'a';
+
+ return x;
+}
+
+char ascii_toupper(char x) {
+
+ if (x >= 'a' && x <= 'z')
+ return x - 'a' + 'A';
+
+ return x;
+}
+
+char *ascii_strlower(char *t) {
+ char *p;
+
+ assert(t);
+
+ for (p = t; *p; p++)
+ *p = ascii_tolower(*p);
+
+ return t;
+}
+
+char *ascii_strupper(char *t) {
+ char *p;
+
+ assert(t);
+
+ for (p = t; *p; p++)
+ *p = ascii_toupper(*p);
+
+ return t;
+}
+
+char *ascii_strlower_n(char *t, size_t n) {
+ size_t i;
+
+ if (n <= 0)
+ return t;
+
+ for (i = 0; i < n; i++)
+ t[i] = ascii_tolower(t[i]);
+
+ return t;
+}
+
+int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
+
+ for (; n > 0; a++, b++, n--) {
+ int x, y;
+
+ x = (int) (uint8_t) ascii_tolower(*a);
+ y = (int) (uint8_t) ascii_tolower(*b);
+
+ if (x != y)
+ return x - y;
+ }
+
+ return 0;
+}
+
+int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
+ int r;
+
+ r = ascii_strcasecmp_n(a, b, MIN(n, m));
+ if (r != 0)
+ return r;
+
+ return CMP(n, m);
+}
+
+bool chars_intersect(const char *a, const char *b) {
+ const char *p;
+
+ /* Returns true if any of the chars in a are in b. */
+ for (p = a; *p; p++)
+ if (strchr(b, *p))
+ return true;
+
+ return false;
+}
+
+bool string_has_cc(const char *p, const char *ok) {
+ const char *t;
+
+ assert(p);
+
+ /*
+ * Check if a string contains control characters. If 'ok' is
+ * non-NULL it may be a string containing additional CCs to be
+ * considered OK.
+ */
+
+ for (t = p; *t; t++) {
+ if (ok && strchr(ok, *t))
+ continue;
+
+ if (*t > 0 && *t < ' ')
+ return true;
+
+ if (*t == 127)
+ return true;
+ }
+
+ return false;
+}
+
+static int write_ellipsis(char *buf, bool unicode) {
+ if (unicode || is_locale_utf8()) {
+ buf[0] = 0xe2; /* tri-dot ellipsis: … */
+ buf[1] = 0x80;
+ buf[2] = 0xa6;
+ } else {
+ buf[0] = '.';
+ buf[1] = '.';
+ buf[2] = '.';
+ }
+
+ return 3;
+}
+
+static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
+ size_t x, need_space, suffix_len;
+ char *t;
+
+ assert(s);
+ assert(percent <= 100);
+ assert(new_length != (size_t) -1);
+
+ if (old_length <= new_length)
+ return strndup(s, old_length);
+
+ /* Special case short ellipsations */
+ switch (new_length) {
+
+ case 0:
+ return strdup("");
+
+ case 1:
+ if (is_locale_utf8())
+ return strdup("…");
+ else
+ return strdup(".");
+
+ case 2:
+ if (!is_locale_utf8())
+ return strdup("..");
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
+ * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
+ * either for the UTF-8 encoded character or for three ASCII characters. */
+ need_space = is_locale_utf8() ? 1 : 3;
+
+ t = new(char, new_length+3);
+ if (!t)
+ return NULL;
+
+ assert(new_length >= need_space);
+
+ x = ((new_length - need_space) * percent + 50) / 100;
+ assert(x <= new_length - need_space);
+
+ memcpy(t, s, x);
+ write_ellipsis(t + x, false);
+ suffix_len = new_length - x - need_space;
+ memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
+ *(t + x + 3 + suffix_len) = '\0';
+
+ return t;
+}
+
+char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
+ size_t x, k, len, len2;
+ const char *i, *j;
+ char *e;
+ int r;
+
+ /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
+ * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
+ * strings.
+ *
+ * Ellipsation is done in a locale-dependent way:
+ * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
+ * 2. Otherwise, a unicode ellipsis is used ("…")
+ *
+ * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
+ * the current locale is UTF-8.
+ */
+
+ assert(s);
+ assert(percent <= 100);
+
+ if (new_length == (size_t) -1)
+ return strndup(s, old_length);
+
+ if (new_length == 0)
+ return strdup("");
+
+ /* If no multibyte characters use ascii_ellipsize_mem for speed */
+ if (ascii_is_valid_n(s, old_length))
+ return ascii_ellipsize_mem(s, old_length, new_length, percent);
+
+ x = ((new_length - 1) * percent) / 100;
+ assert(x <= new_length - 1);
+
+ k = 0;
+ for (i = s; i < s + old_length; i = utf8_next_char(i)) {
+ char32_t c;
+ int w;
+
+ r = utf8_encoded_to_unichar(i, &c);
+ if (r < 0)
+ return NULL;
+
+ w = unichar_iswide(c) ? 2 : 1;
+ if (k + w <= x)
+ k += w;
+ else
+ break;
+ }
+
+ for (j = s + old_length; j > i; ) {
+ char32_t c;
+ int w;
+ const char *jj;
+
+ jj = utf8_prev_char(j);
+ r = utf8_encoded_to_unichar(jj, &c);
+ if (r < 0)
+ return NULL;
+
+ w = unichar_iswide(c) ? 2 : 1;
+ if (k + w <= new_length) {
+ k += w;
+ j = jj;
+ } else
+ break;
+ }
+ assert(i <= j);
+
+ /* we don't actually need to ellipsize */
+ if (i == j)
+ return memdup_suffix0(s, old_length);
+
+ /* make space for ellipsis, if possible */
+ if (j < s + old_length)
+ j = utf8_next_char(j);
+ else if (i > s)
+ i = utf8_prev_char(i);
+
+ len = i - s;
+ len2 = s + old_length - j;
+ e = new(char, len + 3 + len2 + 1);
+ if (!e)
+ return NULL;
+
+ /*
+ printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
+ old_length, new_length, x, len, len2, k);
+ */
+
+ memcpy(e, s, len);
+ write_ellipsis(e + len, true);
+ memcpy(e + len + 3, j, len2);
+ *(e + len + 3 + len2) = '\0';
+
+ return e;
+}
+
+char *cellescape(char *buf, size_t len, const char *s) {
+ /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
+ * characters are copied as they are, everything else is escaped. The result
+ * is different then if escaping and ellipsization was performed in two
+ * separate steps, because each sequence is either stored in full or skipped.
+ *
+ * This function should be used for logging about strings which expected to
+ * be plain ASCII in a safe way.
+ *
+ * An ellipsis will be used if s is too long. It was always placed at the
+ * very end.
+ */
+
+ size_t i = 0, last_char_width[4] = {}, k = 0, j;
+
+ assert(len > 0); /* at least a terminating NUL */
+
+ for (;;) {
+ char four[4];
+ int w;
+
+ if (*s == 0) /* terminating NUL detected? then we are done! */
+ goto done;
+
+ w = cescape_char(*s, four);
+ if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's
+ * ellipsize at the previous location */
+ break;
+
+ /* OK, there was space, let's add this escaped character to the buffer */
+ memcpy(buf + i, four, w);
+ i += w;
+
+ /* And remember its width in the ring buffer */
+ last_char_width[k] = w;
+ k = (k + 1) % 4;
+
+ s++;
+ }
+
+ /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4
+ * characters ideally, but the buffer is shorter than that in the first place take what we can get */
+ for (j = 0; j < ELEMENTSOF(last_char_width); j++) {
+
+ if (i + 4 <= len) /* nice, we reached our space goal */
+ break;
+
+ k = k == 0 ? 3 : k - 1;
+ if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */
+ break;
+
+ assert(i >= last_char_width[k]);
+ i -= last_char_width[k];
+ }
+
+ if (i + 4 <= len) /* yay, enough space */
+ i += write_ellipsis(buf + i, false);
+ else if (i + 3 <= len) { /* only space for ".." */
+ buf[i++] = '.';
+ buf[i++] = '.';
+ } else if (i + 2 <= len) /* only space for a single "." */
+ buf[i++] = '.';
+ else
+ assert(i + 1 <= len);
+
+ done:
+ buf[i] = '\0';
+ return buf;
+}
+
+bool nulstr_contains(const char *nulstr, const char *needle) {
+ const char *i;
+
+ if (!nulstr)
+ return false;
+
+ NULSTR_FOREACH(i, nulstr)
+ if (streq(i, needle))
+ return true;
+
+ return false;
+}
+
+char* strshorten(char *s, size_t l) {
+ assert(s);
+
+ if (strnlen(s, l+1) > l)
+ s[l] = 0;
+
+ return s;
+}
+
+char *strreplace(const char *text, const char *old_string, const char *new_string) {
+ size_t l, old_len, new_len, allocated = 0;
+ char *t, *ret = NULL;
+ const char *f;
+
+ assert(old_string);
+ assert(new_string);
+
+ if (!text)
+ return NULL;
+
+ old_len = strlen(old_string);
+ new_len = strlen(new_string);
+
+ l = strlen(text);
+ if (!GREEDY_REALLOC(ret, allocated, l+1))
+ return NULL;
+
+ f = text;
+ t = ret;
+ while (*f) {
+ size_t d, nl;
+
+ if (!startswith(f, old_string)) {
+ *(t++) = *(f++);
+ continue;
+ }
+
+ d = t - ret;
+ nl = l - old_len + new_len;
+
+ if (!GREEDY_REALLOC(ret, allocated, nl + 1))
+ return mfree(ret);
+
+ l = nl;
+ t = ret + d;
+
+ t = stpcpy(t, new_string);
+ f += old_len;
+ }
+
+ *t = 0;
+ return ret;
+}
+
+static void advance_offsets(ssize_t diff, size_t offsets[static 2], size_t shift[static 2], size_t size) {
+ if (!offsets)
+ return;
+
+ if ((size_t) diff < offsets[0])
+ shift[0] += size;
+ if ((size_t) diff < offsets[1])
+ shift[1] += size;
+}
+
+char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
+ const char *i, *begin = NULL;
+ enum {
+ STATE_OTHER,
+ STATE_ESCAPE,
+ STATE_CSI,
+ STATE_CSO,
+ } state = STATE_OTHER;
+ char *obuf = NULL;
+ size_t osz = 0, isz, shift[2] = {};
+ FILE *f;
+
+ assert(ibuf);
+ assert(*ibuf);
+
+ /* This does three things:
+ *
+ * 1. Replaces TABs by 8 spaces
+ * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
+ * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
+ *
+ * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as are any
+ * other special characters. Truncated ANSI sequences are left-as is too. This call is supposed to suppress the
+ * most basic formatting noise, but nothing else.
+ *
+ * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
+
+ isz = _isz ? *_isz : strlen(*ibuf);
+
+ f = open_memstream(&obuf, &osz);
+ if (!f)
+ return NULL;
+
+ /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we created f here
+ * and it doesn't leave our scope. */
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ for (i = *ibuf; i < *ibuf + isz + 1; i++) {
+
+ switch (state) {
+
+ case STATE_OTHER:
+ if (i >= *ibuf + isz) /* EOT */
+ break;
+ else if (*i == '\x1B')
+ state = STATE_ESCAPE;
+ else if (*i == '\t') {
+ fputs(" ", f);
+ advance_offsets(i - *ibuf, highlight, shift, 7);
+ } else
+ fputc(*i, f);
+
+ break;
+
+ case STATE_ESCAPE:
+ if (i >= *ibuf + isz) { /* EOT */
+ fputc('\x1B', f);
+ advance_offsets(i - *ibuf, highlight, shift, 1);
+ break;
+ } else if (*i == '[') { /* ANSI CSI */
+ state = STATE_CSI;
+ begin = i + 1;
+ } else if (*i == ']') { /* ANSI CSO */
+ state = STATE_CSO;
+ begin = i + 1;
+ } else {
+ fputc('\x1B', f);
+ fputc(*i, f);
+ advance_offsets(i - *ibuf, highlight, shift, 1);
+ state = STATE_OTHER;
+ }
+
+ break;
+
+ case STATE_CSI:
+
+ if (i >= *ibuf + isz || /* EOT … */
+ !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
+ fputc('\x1B', f);
+ fputc('[', f);
+ advance_offsets(i - *ibuf, highlight, shift, 2);
+ state = STATE_OTHER;
+ i = begin-1;
+ } else if (*i == 'm')
+ state = STATE_OTHER;
+
+ break;
+
+ case STATE_CSO:
+
+ if (i >= *ibuf + isz || /* EOT … */
+ (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
+ fputc('\x1B', f);
+ fputc(']', f);
+ advance_offsets(i - *ibuf, highlight, shift, 2);
+ state = STATE_OTHER;
+ i = begin-1;
+ } else if (*i == '\a')
+ state = STATE_OTHER;
+
+ break;
+ }
+ }
+
+ if (fflush_and_check(f) < 0) {
+ fclose(f);
+ return mfree(obuf);
+ }
+
+ fclose(f);
+
+ free(*ibuf);
+ *ibuf = obuf;
+
+ if (_isz)
+ *_isz = osz;
+
+ if (highlight) {
+ highlight[0] += shift[0];
+ highlight[1] += shift[1];
+ }
+
+ return obuf;
+}
+
+char *strextend_with_separator(char **x, const char *separator, ...) {
+ bool need_separator;
+ size_t f, l, l_separator;
+ char *r, *p;
+ va_list ap;
+
+ assert(x);
+
+ l = f = strlen_ptr(*x);
+
+ need_separator = !isempty(*x);
+ l_separator = strlen_ptr(separator);
+
+ va_start(ap, separator);
+ for (;;) {
+ const char *t;
+ size_t n;
+
+ t = va_arg(ap, const char *);
+ if (!t)
+ break;
+
+ n = strlen(t);
+
+ if (need_separator)
+ n += l_separator;
+
+ if (n > ((size_t) -1) - l) {
+ va_end(ap);
+ return NULL;
+ }
+
+ l += n;
+ need_separator = true;
+ }
+ va_end(ap);
+
+ need_separator = !isempty(*x);
+
+ r = realloc(*x, l+1);
+ if (!r)
+ return NULL;
+
+ p = r + f;
+
+ va_start(ap, separator);
+ for (;;) {
+ const char *t;
+
+ t = va_arg(ap, const char *);
+ if (!t)
+ break;
+
+ if (need_separator && separator)
+ p = stpcpy(p, separator);
+
+ p = stpcpy(p, t);
+
+ need_separator = true;
+ }
+ va_end(ap);
+
+ assert(p == r + l);
+
+ *p = 0;
+ *x = r;
+
+ return r + l;
+}
+
+char *strrep(const char *s, unsigned n) {
+ size_t l;
+ char *r, *p;
+ unsigned i;
+
+ assert(s);
+
+ l = strlen(s);
+ p = r = malloc(l * n + 1);
+ if (!r)
+ return NULL;
+
+ for (i = 0; i < n; i++)
+ p = stpcpy(p, s);
+
+ *p = 0;
+ return r;
+}
+
+int split_pair(const char *s, const char *sep, char **l, char **r) {
+ char *x, *a, *b;
+
+ assert(s);
+ assert(sep);
+ assert(l);
+ assert(r);
+
+ if (isempty(sep))
+ return -EINVAL;
+
+ x = strstr(s, sep);
+ if (!x)
+ return -EINVAL;
+
+ a = strndup(s, x - s);
+ if (!a)
+ return -ENOMEM;
+
+ b = strdup(x + strlen(sep));
+ if (!b) {
+ free(a);
+ return -ENOMEM;
+ }
+
+ *l = a;
+ *r = b;
+
+ return 0;
+}
+
+int free_and_strdup(char **p, const char *s) {
+ char *t;
+
+ assert(p);
+
+ /* Replaces a string pointer with a strdup()ed new string,
+ * possibly freeing the old one. */
+
+ if (streq_ptr(*p, s))
+ return 0;
+
+ if (s) {
+ t = strdup(s);
+ if (!t)
+ return -ENOMEM;
+ } else
+ t = NULL;
+
+ free(*p);
+ *p = t;
+
+ return 1;
+}
+
+int free_and_strndup(char **p, const char *s, size_t l) {
+ char *t;
+
+ assert(p);
+ assert(s || l == 0);
+
+ /* Replaces a string pointer with a strndup()ed new string,
+ * freeing the old one. */
+
+ if (!*p && !s)
+ return 0;
+
+ if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0'))
+ return 0;
+
+ if (s) {
+ t = strndup(s, l);
+ if (!t)
+ return -ENOMEM;
+ } else
+ t = NULL;
+
+ free_and_replace(*p, t);
+ return 1;
+}
+
+#if !HAVE_EXPLICIT_BZERO
+/*
+ * Pointer to memset is volatile so that compiler must de-reference
+ * the pointer and can't assume that it points to any function in
+ * particular (such as memset, which it then might further "optimize")
+ * This approach is inspired by openssl's crypto/mem_clr.c.
+ */
+typedef void *(*memset_t)(void *,int,size_t);
+
+static volatile memset_t memset_func = memset;
+
+void* explicit_bzero_safe(void *p, size_t l) {
+ if (l > 0)
+ memset_func(p, '\0', l);
+
+ return p;
+}
+#endif
+
+char* string_erase(char *x) {
+ if (!x)
+ return NULL;
+
+ /* A delicious drop of snake-oil! To be called on memory where
+ * we stored passphrases or so, after we used them. */
+ explicit_bzero_safe(x, strlen(x));
+ return x;
+}
+
+char *string_free_erase(char *s) {
+ return mfree(string_erase(s));
+}
+
+bool string_is_safe(const char *p) {
+ const char *t;
+
+ if (!p)
+ return false;
+
+ for (t = p; *t; t++) {
+ if (*t > 0 && *t < ' ') /* no control characters */
+ return false;
+
+ if (strchr(QUOTES "\\\x7f", *t))
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/basic/string-util.h b/src/basic/string-util.h
new file mode 100644
index 0000000..38070ab
--- /dev/null
+++ b/src/basic/string-util.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <alloca.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+
+/* What is interpreted as whitespace? */
+#define WHITESPACE " \t\n\r"
+#define NEWLINE "\n\r"
+#define QUOTES "\"\'"
+#define COMMENTS "#;"
+#define GLOB_CHARS "*?["
+#define DIGITS "0123456789"
+#define LOWERCASE_LETTERS "abcdefghijklmnopqrstuvwxyz"
+#define UPPERCASE_LETTERS "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+#define LETTERS LOWERCASE_LETTERS UPPERCASE_LETTERS
+#define ALPHANUMERICAL LETTERS DIGITS
+#define HEXDIGITS DIGITS "abcdefABCDEF"
+
+#define streq(a,b) (strcmp((a),(b)) == 0)
+#define strneq(a, b, n) (strncmp((a), (b), (n)) == 0)
+#define strcaseeq(a,b) (strcasecmp((a),(b)) == 0)
+#define strncaseeq(a, b, n) (strncasecmp((a), (b), (n)) == 0)
+
+int strcmp_ptr(const char *a, const char *b) _pure_;
+
+static inline bool streq_ptr(const char *a, const char *b) {
+ return strcmp_ptr(a, b) == 0;
+}
+
+static inline const char* strempty(const char *s) {
+ return s ?: "";
+}
+
+static inline const char* strnull(const char *s) {
+ return s ?: "(null)";
+}
+
+static inline const char *strna(const char *s) {
+ return s ?: "n/a";
+}
+
+static inline bool isempty(const char *p) {
+ return !p || !p[0];
+}
+
+static inline const char *empty_to_null(const char *p) {
+ return isempty(p) ? NULL : p;
+}
+
+static inline const char *empty_to_dash(const char *str) {
+ return isempty(str) ? "-" : str;
+}
+
+static inline char *startswith(const char *s, const char *prefix) {
+ size_t l;
+
+ l = strlen(prefix);
+ if (strncmp(s, prefix, l) == 0)
+ return (char*) s + l;
+
+ return NULL;
+}
+
+static inline char *startswith_no_case(const char *s, const char *prefix) {
+ size_t l;
+
+ l = strlen(prefix);
+ if (strncasecmp(s, prefix, l) == 0)
+ return (char*) s + l;
+
+ return NULL;
+}
+
+char *endswith(const char *s, const char *postfix) _pure_;
+char *endswith_no_case(const char *s, const char *postfix) _pure_;
+
+char *first_word(const char *s, const char *word) _pure_;
+
+typedef enum SplitFlags {
+ SPLIT_QUOTES = 0x01 << 0,
+ SPLIT_RELAX = 0x01 << 1,
+} SplitFlags;
+
+const char* split(const char **state, size_t *l, const char *separator, SplitFlags flags);
+
+#define FOREACH_WORD(word, length, s, state) \
+ _FOREACH_WORD(word, length, s, WHITESPACE, 0, state)
+
+#define FOREACH_WORD_SEPARATOR(word, length, s, separator, state) \
+ _FOREACH_WORD(word, length, s, separator, 0, state)
+
+#define _FOREACH_WORD(word, length, s, separator, flags, state) \
+ for ((state) = (s), (word) = split(&(state), &(length), (separator), (flags)); (word); (word) = split(&(state), &(length), (separator), (flags)))
+
+char *strappend(const char *s, const char *suffix);
+char *strnappend(const char *s, const char *suffix, size_t length);
+
+char *strjoin_real(const char *x, ...) _sentinel_;
+#define strjoin(a, ...) strjoin_real((a), __VA_ARGS__, NULL)
+
+#define strjoina(a, ...) \
+ ({ \
+ const char *_appendees_[] = { a, __VA_ARGS__ }; \
+ char *_d_, *_p_; \
+ size_t _len_ = 0; \
+ size_t _i_; \
+ for (_i_ = 0; _i_ < ELEMENTSOF(_appendees_) && _appendees_[_i_]; _i_++) \
+ _len_ += strlen(_appendees_[_i_]); \
+ _p_ = _d_ = newa(char, _len_ + 1); \
+ for (_i_ = 0; _i_ < ELEMENTSOF(_appendees_) && _appendees_[_i_]; _i_++) \
+ _p_ = stpcpy(_p_, _appendees_[_i_]); \
+ *_p_ = 0; \
+ _d_; \
+ })
+
+char *strstrip(char *s);
+char *delete_chars(char *s, const char *bad);
+char *delete_trailing_chars(char *s, const char *bad);
+char *truncate_nl(char *s);
+
+static inline char *skip_leading_chars(const char *s, const char *bad) {
+
+ if (!s)
+ return NULL;
+
+ if (!bad)
+ bad = WHITESPACE;
+
+ return (char*) s + strspn(s, bad);
+}
+
+char ascii_tolower(char x);
+char *ascii_strlower(char *s);
+char *ascii_strlower_n(char *s, size_t n);
+
+char ascii_toupper(char x);
+char *ascii_strupper(char *s);
+
+int ascii_strcasecmp_n(const char *a, const char *b, size_t n);
+int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m);
+
+bool chars_intersect(const char *a, const char *b) _pure_;
+
+static inline bool _pure_ in_charset(const char *s, const char* charset) {
+ assert(s);
+ assert(charset);
+ return s[strspn(s, charset)] == '\0';
+}
+
+bool string_has_cc(const char *p, const char *ok) _pure_;
+
+char *ellipsize_mem(const char *s, size_t old_length_bytes, size_t new_length_columns, unsigned percent);
+static inline char *ellipsize(const char *s, size_t length, unsigned percent) {
+ return ellipsize_mem(s, strlen(s), length, percent);
+}
+
+char *cellescape(char *buf, size_t len, const char *s);
+
+/* This limit is arbitrary, enough to give some idea what the string contains */
+#define CELLESCAPE_DEFAULT_LENGTH 64
+
+bool nulstr_contains(const char *nulstr, const char *needle);
+
+char* strshorten(char *s, size_t l);
+
+char *strreplace(const char *text, const char *old_string, const char *new_string);
+
+char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]);
+
+char *strextend_with_separator(char **x, const char *separator, ...) _sentinel_;
+
+#define strextend(x, ...) strextend_with_separator(x, NULL, __VA_ARGS__)
+
+char *strrep(const char *s, unsigned n);
+
+int split_pair(const char *s, const char *sep, char **l, char **r);
+
+int free_and_strdup(char **p, const char *s);
+int free_and_strndup(char **p, const char *s, size_t l);
+
+/* Normal memmem() requires haystack to be nonnull, which is annoying for zero-length buffers */
+static inline void *memmem_safe(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen) {
+
+ if (needlelen <= 0)
+ return (void*) haystack;
+
+ if (haystacklen < needlelen)
+ return NULL;
+
+ assert(haystack);
+ assert(needle);
+
+ return memmem(haystack, haystacklen, needle, needlelen);
+}
+
+#if HAVE_EXPLICIT_BZERO
+static inline void* explicit_bzero_safe(void *p, size_t l) {
+ if (l > 0)
+ explicit_bzero(p, l);
+
+ return p;
+}
+#else
+void *explicit_bzero_safe(void *p, size_t l);
+#endif
+
+char *string_erase(char *x);
+
+char *string_free_erase(char *s);
+DEFINE_TRIVIAL_CLEANUP_FUNC(char *, string_free_erase);
+#define _cleanup_string_free_erase_ _cleanup_(string_free_erasep)
+
+bool string_is_safe(const char *p) _pure_;
+
+static inline size_t strlen_ptr(const char *s) {
+ if (!s)
+ return 0;
+
+ return strlen(s);
+}
+
+/* Like startswith(), but operates on arbitrary memory blocks */
+static inline void *memory_startswith(const void *p, size_t sz, const char *token) {
+ size_t n;
+
+ assert(token);
+
+ n = strlen(token);
+ if (sz < n)
+ return NULL;
+
+ assert(p);
+
+ if (memcmp(p, token, n) != 0)
+ return NULL;
+
+ return (uint8_t*) p + n;
+}
+
+/* Like startswith_no_case(), but operates on arbitrary memory blocks.
+ * It works only for ASCII strings.
+ */
+static inline void *memory_startswith_no_case(const void *p, size_t sz, const char *token) {
+ size_t n, i;
+
+ assert(token);
+
+ n = strlen(token);
+ if (sz < n)
+ return NULL;
+
+ assert(p);
+
+ for (i = 0; i < n; i++) {
+ if (ascii_tolower(((char *)p)[i]) != ascii_tolower(token[i]))
+ return NULL;
+ }
+
+ return (uint8_t*) p + n;
+}
diff --git a/src/basic/strv.c b/src/basic/strv.c
new file mode 100644
index 0000000..3a62f25
--- /dev/null
+++ b/src/basic/strv.c
@@ -0,0 +1,889 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fnmatch.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+char *strv_find(char **l, const char *name) {
+ char **i;
+
+ assert(name);
+
+ STRV_FOREACH(i, l)
+ if (streq(*i, name))
+ return *i;
+
+ return NULL;
+}
+
+char *strv_find_prefix(char **l, const char *name) {
+ char **i;
+
+ assert(name);
+
+ STRV_FOREACH(i, l)
+ if (startswith(*i, name))
+ return *i;
+
+ return NULL;
+}
+
+char *strv_find_startswith(char **l, const char *name) {
+ char **i, *e;
+
+ assert(name);
+
+ /* Like strv_find_prefix, but actually returns only the
+ * suffix, not the whole item */
+
+ STRV_FOREACH(i, l) {
+ e = startswith(*i, name);
+ if (e)
+ return e;
+ }
+
+ return NULL;
+}
+
+void strv_clear(char **l) {
+ char **k;
+
+ if (!l)
+ return;
+
+ for (k = l; *k; k++)
+ free(*k);
+
+ *l = NULL;
+}
+
+char **strv_free(char **l) {
+ strv_clear(l);
+ return mfree(l);
+}
+
+char **strv_free_erase(char **l) {
+ char **i;
+
+ STRV_FOREACH(i, l)
+ string_erase(*i);
+
+ return strv_free(l);
+}
+
+char **strv_copy(char * const *l) {
+ char **r, **k;
+
+ k = r = new(char*, strv_length(l) + 1);
+ if (!r)
+ return NULL;
+
+ if (l)
+ for (; *l; k++, l++) {
+ *k = strdup(*l);
+ if (!*k) {
+ strv_free(r);
+ return NULL;
+ }
+ }
+
+ *k = NULL;
+ return r;
+}
+
+size_t strv_length(char * const *l) {
+ size_t n = 0;
+
+ if (!l)
+ return 0;
+
+ for (; *l; l++)
+ n++;
+
+ return n;
+}
+
+char **strv_new_ap(const char *x, va_list ap) {
+ const char *s;
+ _cleanup_strv_free_ char **a = NULL;
+ size_t n = 0, i = 0;
+ va_list aq;
+
+ /* As a special trick we ignore all listed strings that equal
+ * STRV_IGNORE. This is supposed to be used with the
+ * STRV_IFNOTNULL() macro to include possibly NULL strings in
+ * the string list. */
+
+ if (x) {
+ n = x == STRV_IGNORE ? 0 : 1;
+
+ va_copy(aq, ap);
+ while ((s = va_arg(aq, const char*))) {
+ if (s == STRV_IGNORE)
+ continue;
+
+ n++;
+ }
+
+ va_end(aq);
+ }
+
+ a = new(char*, n+1);
+ if (!a)
+ return NULL;
+
+ if (x) {
+ if (x != STRV_IGNORE) {
+ a[i] = strdup(x);
+ if (!a[i])
+ return NULL;
+ i++;
+ }
+
+ while ((s = va_arg(ap, const char*))) {
+
+ if (s == STRV_IGNORE)
+ continue;
+
+ a[i] = strdup(s);
+ if (!a[i])
+ return NULL;
+
+ i++;
+ }
+ }
+
+ a[i] = NULL;
+
+ return TAKE_PTR(a);
+}
+
+char **strv_new_internal(const char *x, ...) {
+ char **r;
+ va_list ap;
+
+ va_start(ap, x);
+ r = strv_new_ap(x, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int strv_extend_strv(char ***a, char **b, bool filter_duplicates) {
+ char **s, **t;
+ size_t p, q, i = 0, j;
+
+ assert(a);
+
+ if (strv_isempty(b))
+ return 0;
+
+ p = strv_length(*a);
+ q = strv_length(b);
+
+ t = reallocarray(*a, p + q + 1, sizeof(char *));
+ if (!t)
+ return -ENOMEM;
+
+ t[p] = NULL;
+ *a = t;
+
+ STRV_FOREACH(s, b) {
+
+ if (filter_duplicates && strv_contains(t, *s))
+ continue;
+
+ t[p+i] = strdup(*s);
+ if (!t[p+i])
+ goto rollback;
+
+ i++;
+ t[p+i] = NULL;
+ }
+
+ assert(i <= q);
+
+ return (int) i;
+
+rollback:
+ for (j = 0; j < i; j++)
+ free(t[p + j]);
+
+ t[p] = NULL;
+ return -ENOMEM;
+}
+
+int strv_extend_strv_concat(char ***a, char **b, const char *suffix) {
+ int r;
+ char **s;
+
+ STRV_FOREACH(s, b) {
+ char *v;
+
+ v = strappend(*s, suffix);
+ if (!v)
+ return -ENOMEM;
+
+ r = strv_push(a, v);
+ if (r < 0) {
+ free(v);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+char **strv_split_full(const char *s, const char *separator, SplitFlags flags) {
+ const char *word, *state;
+ size_t l;
+ size_t n, i;
+ char **r;
+
+ assert(s);
+
+ if (!separator)
+ separator = WHITESPACE;
+
+ s += strspn(s, separator);
+ if (isempty(s))
+ return new0(char*, 1);
+
+ n = 0;
+ _FOREACH_WORD(word, l, s, separator, flags, state)
+ n++;
+
+ r = new(char*, n+1);
+ if (!r)
+ return NULL;
+
+ i = 0;
+ _FOREACH_WORD(word, l, s, separator, flags, state) {
+ r[i] = strndup(word, l);
+ if (!r[i]) {
+ strv_free(r);
+ return NULL;
+ }
+
+ i++;
+ }
+
+ r[i] = NULL;
+ return r;
+}
+
+char **strv_split_newlines(const char *s) {
+ char **l;
+ size_t n;
+
+ assert(s);
+
+ /* Special version of strv_split() that splits on newlines and
+ * suppresses an empty string at the end */
+
+ l = strv_split(s, NEWLINE);
+ if (!l)
+ return NULL;
+
+ n = strv_length(l);
+ if (n <= 0)
+ return l;
+
+ if (isempty(l[n - 1]))
+ l[n - 1] = mfree(l[n - 1]);
+
+ return l;
+}
+
+int strv_split_extract(char ***t, const char *s, const char *separators, ExtractFlags flags) {
+ _cleanup_strv_free_ char **l = NULL;
+ size_t n = 0, allocated = 0;
+ int r;
+
+ assert(t);
+ assert(s);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&s, &word, separators, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!GREEDY_REALLOC(l, allocated, n + 2))
+ return -ENOMEM;
+
+ l[n++] = TAKE_PTR(word);
+
+ l[n] = NULL;
+ }
+
+ if (!l) {
+ l = new0(char*, 1);
+ if (!l)
+ return -ENOMEM;
+ }
+
+ *t = TAKE_PTR(l);
+
+ return (int) n;
+}
+
+char *strv_join_prefix(char **l, const char *separator, const char *prefix) {
+ char *r, *e;
+ char **s;
+ size_t n, k, m;
+
+ if (!separator)
+ separator = " ";
+
+ k = strlen(separator);
+ m = strlen_ptr(prefix);
+
+ n = 0;
+ STRV_FOREACH(s, l) {
+ if (s != l)
+ n += k;
+ n += m + strlen(*s);
+ }
+
+ r = new(char, n+1);
+ if (!r)
+ return NULL;
+
+ e = r;
+ STRV_FOREACH(s, l) {
+ if (s != l)
+ e = stpcpy(e, separator);
+
+ if (prefix)
+ e = stpcpy(e, prefix);
+
+ e = stpcpy(e, *s);
+ }
+
+ *e = 0;
+
+ return r;
+}
+
+int strv_push(char ***l, char *value) {
+ char **c;
+ size_t n, m;
+
+ if (!value)
+ return 0;
+
+ n = strv_length(*l);
+
+ /* Increase and check for overflow */
+ m = n + 2;
+ if (m < n)
+ return -ENOMEM;
+
+ c = reallocarray(*l, m, sizeof(char*));
+ if (!c)
+ return -ENOMEM;
+
+ c[n] = value;
+ c[n+1] = NULL;
+
+ *l = c;
+ return 0;
+}
+
+int strv_push_pair(char ***l, char *a, char *b) {
+ char **c;
+ size_t n, m;
+
+ if (!a && !b)
+ return 0;
+
+ n = strv_length(*l);
+
+ /* increase and check for overflow */
+ m = n + !!a + !!b + 1;
+ if (m < n)
+ return -ENOMEM;
+
+ c = reallocarray(*l, m, sizeof(char*));
+ if (!c)
+ return -ENOMEM;
+
+ if (a)
+ c[n++] = a;
+ if (b)
+ c[n++] = b;
+ c[n] = NULL;
+
+ *l = c;
+ return 0;
+}
+
+int strv_insert(char ***l, size_t position, char *value) {
+ char **c;
+ size_t n, m, i;
+
+ if (!value)
+ return 0;
+
+ n = strv_length(*l);
+ position = MIN(position, n);
+
+ /* increase and check for overflow */
+ m = n + 2;
+ if (m < n)
+ return -ENOMEM;
+
+ c = new(char*, m);
+ if (!c)
+ return -ENOMEM;
+
+ for (i = 0; i < position; i++)
+ c[i] = (*l)[i];
+ c[position] = value;
+ for (i = position; i < n; i++)
+ c[i+1] = (*l)[i];
+
+ c[n+1] = NULL;
+
+ free(*l);
+ *l = c;
+
+ return 0;
+}
+
+int strv_consume(char ***l, char *value) {
+ int r;
+
+ r = strv_push(l, value);
+ if (r < 0)
+ free(value);
+
+ return r;
+}
+
+int strv_consume_pair(char ***l, char *a, char *b) {
+ int r;
+
+ r = strv_push_pair(l, a, b);
+ if (r < 0) {
+ free(a);
+ free(b);
+ }
+
+ return r;
+}
+
+int strv_consume_prepend(char ***l, char *value) {
+ int r;
+
+ r = strv_push_prepend(l, value);
+ if (r < 0)
+ free(value);
+
+ return r;
+}
+
+int strv_extend(char ***l, const char *value) {
+ char *v;
+
+ if (!value)
+ return 0;
+
+ v = strdup(value);
+ if (!v)
+ return -ENOMEM;
+
+ return strv_consume(l, v);
+}
+
+int strv_extend_front(char ***l, const char *value) {
+ size_t n, m;
+ char *v, **c;
+
+ assert(l);
+
+ /* Like strv_extend(), but prepends rather than appends the new entry */
+
+ if (!value)
+ return 0;
+
+ n = strv_length(*l);
+
+ /* Increase and overflow check. */
+ m = n + 2;
+ if (m < n)
+ return -ENOMEM;
+
+ v = strdup(value);
+ if (!v)
+ return -ENOMEM;
+
+ c = reallocarray(*l, m, sizeof(char*));
+ if (!c) {
+ free(v);
+ return -ENOMEM;
+ }
+
+ memmove(c+1, c, n * sizeof(char*));
+ c[0] = v;
+ c[n+1] = NULL;
+
+ *l = c;
+ return 0;
+}
+
+char **strv_uniq(char **l) {
+ char **i;
+
+ /* Drops duplicate entries. The first identical string will be
+ * kept, the others dropped */
+
+ STRV_FOREACH(i, l)
+ strv_remove(i+1, *i);
+
+ return l;
+}
+
+bool strv_is_uniq(char **l) {
+ char **i;
+
+ STRV_FOREACH(i, l)
+ if (strv_find(i+1, *i))
+ return false;
+
+ return true;
+}
+
+char **strv_remove(char **l, const char *s) {
+ char **f, **t;
+
+ if (!l)
+ return NULL;
+
+ assert(s);
+
+ /* Drops every occurrence of s in the string list, edits
+ * in-place. */
+
+ for (f = t = l; *f; f++)
+ if (streq(*f, s))
+ free(*f);
+ else
+ *(t++) = *f;
+
+ *t = NULL;
+ return l;
+}
+
+char **strv_parse_nulstr(const char *s, size_t l) {
+ /* l is the length of the input data, which will be split at NULs into
+ * elements of the resulting strv. Hence, the number of items in the resulting strv
+ * will be equal to one plus the number of NUL bytes in the l bytes starting at s,
+ * unless s[l-1] is NUL, in which case the final empty string is not stored in
+ * the resulting strv, and length is equal to the number of NUL bytes.
+ *
+ * Note that contrary to a normal nulstr which cannot contain empty strings, because
+ * the input data is terminated by any two consequent NUL bytes, this parser accepts
+ * empty strings in s.
+ */
+
+ const char *p;
+ size_t c = 0, i = 0;
+ char **v;
+
+ assert(s || l <= 0);
+
+ if (l <= 0)
+ return new0(char*, 1);
+
+ for (p = s; p < s + l; p++)
+ if (*p == 0)
+ c++;
+
+ if (s[l-1] != 0)
+ c++;
+
+ v = new0(char*, c+1);
+ if (!v)
+ return NULL;
+
+ p = s;
+ while (p < s + l) {
+ const char *e;
+
+ e = memchr(p, 0, s + l - p);
+
+ v[i] = strndup(p, e ? e - p : s + l - p);
+ if (!v[i]) {
+ strv_free(v);
+ return NULL;
+ }
+
+ i++;
+
+ if (!e)
+ break;
+
+ p = e + 1;
+ }
+
+ assert(i == c);
+
+ return v;
+}
+
+char **strv_split_nulstr(const char *s) {
+ const char *i;
+ char **r = NULL;
+
+ NULSTR_FOREACH(i, s)
+ if (strv_extend(&r, i) < 0) {
+ strv_free(r);
+ return NULL;
+ }
+
+ if (!r)
+ return strv_new(NULL);
+
+ return r;
+}
+
+int strv_make_nulstr(char **l, char **p, size_t *q) {
+ /* A valid nulstr with two NULs at the end will be created, but
+ * q will be the length without the two trailing NULs. Thus the output
+ * string is a valid nulstr and can be iterated over using NULSTR_FOREACH,
+ * and can also be parsed by strv_parse_nulstr as long as the length
+ * is provided separately.
+ */
+
+ size_t n_allocated = 0, n = 0;
+ _cleanup_free_ char *m = NULL;
+ char **i;
+
+ assert(p);
+ assert(q);
+
+ STRV_FOREACH(i, l) {
+ size_t z;
+
+ z = strlen(*i);
+
+ if (!GREEDY_REALLOC(m, n_allocated, n + z + 2))
+ return -ENOMEM;
+
+ memcpy(m + n, *i, z + 1);
+ n += z + 1;
+ }
+
+ if (!m) {
+ m = new0(char, 1);
+ if (!m)
+ return -ENOMEM;
+ n = 1;
+ } else
+ /* make sure there is a second extra NUL at the end of resulting nulstr */
+ m[n] = '\0';
+
+ assert(n > 0);
+ *p = m;
+ *q = n - 1;
+
+ m = NULL;
+
+ return 0;
+}
+
+bool strv_overlap(char **a, char **b) {
+ char **i;
+
+ STRV_FOREACH(i, a)
+ if (strv_contains(b, *i))
+ return true;
+
+ return false;
+}
+
+static int str_compare(char * const *a, char * const *b) {
+ return strcmp(*a, *b);
+}
+
+char **strv_sort(char **l) {
+ typesafe_qsort(l, strv_length(l), str_compare);
+ return l;
+}
+
+bool strv_equal(char **a, char **b) {
+
+ if (strv_isempty(a))
+ return strv_isempty(b);
+
+ if (strv_isempty(b))
+ return false;
+
+ for ( ; *a || *b; ++a, ++b)
+ if (!streq_ptr(*a, *b))
+ return false;
+
+ return true;
+}
+
+void strv_print(char **l) {
+ char **s;
+
+ STRV_FOREACH(s, l)
+ puts(*s);
+}
+
+int strv_extendf(char ***l, const char *format, ...) {
+ va_list ap;
+ char *x;
+ int r;
+
+ va_start(ap, format);
+ r = vasprintf(&x, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return strv_consume(l, x);
+}
+
+char **strv_reverse(char **l) {
+ size_t n, i;
+
+ n = strv_length(l);
+ if (n <= 1)
+ return l;
+
+ for (i = 0; i < n / 2; i++)
+ SWAP_TWO(l[i], l[n-1-i]);
+
+ return l;
+}
+
+char **strv_shell_escape(char **l, const char *bad) {
+ char **s;
+
+ /* Escapes every character in every string in l that is in bad,
+ * edits in-place, does not roll-back on error. */
+
+ STRV_FOREACH(s, l) {
+ char *v;
+
+ v = shell_escape(*s, bad);
+ if (!v)
+ return NULL;
+
+ free(*s);
+ *s = v;
+ }
+
+ return l;
+}
+
+bool strv_fnmatch(char* const* patterns, const char *s, int flags) {
+ char* const* p;
+
+ STRV_FOREACH(p, patterns)
+ if (fnmatch(*p, s, flags) == 0)
+ return true;
+
+ return false;
+}
+
+char ***strv_free_free(char ***l) {
+ char ***i;
+
+ if (!l)
+ return NULL;
+
+ for (i = l; *i; i++)
+ strv_free(*i);
+
+ return mfree(l);
+}
+
+char **strv_skip(char **l, size_t n) {
+
+ while (n > 0) {
+ if (strv_isempty(l))
+ return l;
+
+ l++, n--;
+ }
+
+ return l;
+}
+
+int strv_extend_n(char ***l, const char *value, size_t n) {
+ size_t i, j, k;
+ char **nl;
+
+ assert(l);
+
+ if (!value)
+ return 0;
+ if (n == 0)
+ return 0;
+
+ /* Adds the value n times to l */
+
+ k = strv_length(*l);
+
+ nl = reallocarray(*l, k + n + 1, sizeof(char *));
+ if (!nl)
+ return -ENOMEM;
+
+ *l = nl;
+
+ for (i = k; i < k + n; i++) {
+ nl[i] = strdup(value);
+ if (!nl[i])
+ goto rollback;
+ }
+
+ nl[i] = NULL;
+ return 0;
+
+rollback:
+ for (j = k; j < i; j++)
+ free(nl[j]);
+
+ nl[k] = NULL;
+ return -ENOMEM;
+}
+
+int fputstrv(FILE *f, char **l, const char *separator, bool *space) {
+ bool b = false;
+ char **s;
+ int r;
+
+ /* Like fputs(), but for strv, and with a less stupid argument order */
+
+ if (!space)
+ space = &b;
+
+ STRV_FOREACH(s, l) {
+ r = fputs_with_space(f, *s, separator, space);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/basic/strv.h b/src/basic/strv.h
new file mode 100644
index 0000000..392cab6
--- /dev/null
+++ b/src/basic/strv.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <fnmatch.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "macro.h"
+#include "string-util.h"
+#include "util.h"
+
+char *strv_find(char **l, const char *name) _pure_;
+char *strv_find_prefix(char **l, const char *name) _pure_;
+char *strv_find_startswith(char **l, const char *name) _pure_;
+
+char **strv_free(char **l);
+DEFINE_TRIVIAL_CLEANUP_FUNC(char**, strv_free);
+#define _cleanup_strv_free_ _cleanup_(strv_freep)
+
+char **strv_free_erase(char **l);
+DEFINE_TRIVIAL_CLEANUP_FUNC(char**, strv_free_erase);
+#define _cleanup_strv_free_erase_ _cleanup_(strv_free_erasep)
+
+void strv_clear(char **l);
+
+char **strv_copy(char * const *l);
+size_t strv_length(char * const *l) _pure_;
+
+int strv_extend_strv(char ***a, char **b, bool filter_duplicates);
+int strv_extend_strv_concat(char ***a, char **b, const char *suffix);
+int strv_extend(char ***l, const char *value);
+int strv_extendf(char ***l, const char *format, ...) _printf_(2,0);
+int strv_extend_front(char ***l, const char *value);
+int strv_push(char ***l, char *value);
+int strv_push_pair(char ***l, char *a, char *b);
+int strv_insert(char ***l, size_t position, char *value);
+
+static inline int strv_push_prepend(char ***l, char *value) {
+ return strv_insert(l, 0, value);
+}
+
+int strv_consume(char ***l, char *value);
+int strv_consume_pair(char ***l, char *a, char *b);
+int strv_consume_prepend(char ***l, char *value);
+
+char **strv_remove(char **l, const char *s);
+char **strv_uniq(char **l);
+bool strv_is_uniq(char **l);
+
+bool strv_equal(char **a, char **b);
+
+#define strv_contains(l, s) (!!strv_find((l), (s)))
+
+char **strv_new_internal(const char *x, ...) _sentinel_;
+char **strv_new_ap(const char *x, va_list ap);
+#define strv_new(...) strv_new_internal(__VA_ARGS__, NULL)
+
+#define STRV_IGNORE ((const char *) -1)
+
+static inline const char* STRV_IFNOTNULL(const char *x) {
+ return x ? x : STRV_IGNORE;
+}
+
+static inline bool strv_isempty(char * const *l) {
+ return !l || !*l;
+}
+
+char **strv_split_full(const char *s, const char *separator, SplitFlags flags);
+static inline char **strv_split(const char *s, const char *separator) {
+ return strv_split_full(s, separator, 0);
+}
+char **strv_split_newlines(const char *s);
+
+int strv_split_extract(char ***t, const char *s, const char *separators, ExtractFlags flags);
+
+char *strv_join_prefix(char **l, const char *separator, const char *prefix);
+static inline char *strv_join(char **l, const char *separator) {
+ return strv_join_prefix(l, separator, NULL);
+}
+
+char **strv_parse_nulstr(const char *s, size_t l);
+char **strv_split_nulstr(const char *s);
+int strv_make_nulstr(char **l, char **p, size_t *n);
+
+bool strv_overlap(char **a, char **b) _pure_;
+
+#define STRV_FOREACH(s, l) \
+ for ((s) = (l); (s) && *(s); (s)++)
+
+#define STRV_FOREACH_BACKWARDS(s, l) \
+ for (s = ({ \
+ char **_l = l; \
+ _l ? _l + strv_length(_l) - 1U : NULL; \
+ }); \
+ (l) && ((s) >= (l)); \
+ (s)--)
+
+#define STRV_FOREACH_PAIR(x, y, l) \
+ for ((x) = (l), (y) = (x+1); (x) && *(x) && *(y); (x) += 2, (y) = (x + 1))
+
+char **strv_sort(char **l);
+void strv_print(char **l);
+
+#define STRV_MAKE(...) ((char**) ((const char*[]) { __VA_ARGS__, NULL }))
+
+#define STRV_MAKE_EMPTY ((char*[1]) { NULL })
+
+#define strv_from_stdarg_alloca(first) \
+ ({ \
+ char **_l; \
+ \
+ if (!first) \
+ _l = (char**) &first; \
+ else { \
+ size_t _n; \
+ va_list _ap; \
+ \
+ _n = 1; \
+ va_start(_ap, first); \
+ while (va_arg(_ap, char*)) \
+ _n++; \
+ va_end(_ap); \
+ \
+ _l = newa(char*, _n+1); \
+ _l[_n = 0] = (char*) first; \
+ va_start(_ap, first); \
+ for (;;) { \
+ _l[++_n] = va_arg(_ap, char*); \
+ if (!_l[_n]) \
+ break; \
+ } \
+ va_end(_ap); \
+ } \
+ _l; \
+ })
+
+#define STR_IN_SET(x, ...) strv_contains(STRV_MAKE(__VA_ARGS__), x)
+#define STRPTR_IN_SET(x, ...) \
+ ({ \
+ const char* _x = (x); \
+ _x && strv_contains(STRV_MAKE(__VA_ARGS__), _x); \
+ })
+
+#define STARTSWITH_SET(p, ...) \
+ ({ \
+ const char *_p = (p); \
+ char *_found = NULL, **_i; \
+ STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \
+ _found = startswith(_p, *_i); \
+ if (_found) \
+ break; \
+ } \
+ _found; \
+ })
+
+#define FOREACH_STRING(x, y, ...) \
+ for (char **_l = STRV_MAKE(({ x = y; }), ##__VA_ARGS__); \
+ x; \
+ x = *(++_l))
+
+char **strv_reverse(char **l);
+char **strv_shell_escape(char **l, const char *bad);
+
+bool strv_fnmatch(char* const* patterns, const char *s, int flags);
+
+static inline bool strv_fnmatch_or_empty(char* const* patterns, const char *s, int flags) {
+ assert(s);
+ return strv_isempty(patterns) ||
+ strv_fnmatch(patterns, s, flags);
+}
+
+char ***strv_free_free(char ***l);
+DEFINE_TRIVIAL_CLEANUP_FUNC(char***, strv_free_free);
+
+char **strv_skip(char **l, size_t n);
+
+int strv_extend_n(char ***l, const char *value, size_t n);
+
+int fputstrv(FILE *f, char **l, const char *separator, bool *space);
+
+#define strv_free_and_replace(a, b) \
+ ({ \
+ strv_free(a); \
+ (a) = (b); \
+ (b) = NULL; \
+ 0; \
+ })
diff --git a/src/basic/strxcpyx.c b/src/basic/strxcpyx.c
new file mode 100644
index 0000000..9210277
--- /dev/null
+++ b/src/basic/strxcpyx.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+/*
+ * Concatenates/copies strings. In any case, terminates in all cases
+ * with '\0' and moves the @dest pointer forward to the added '\0'.
+ * Returns the remaining size, and 0 if the string was truncated.
+ *
+ * Due to the intended usage, these helpers silently noop invocations
+ * having zero size. This is technically an exception to the above
+ * statement "terminates in all cases". It's unexpected for such calls to
+ * occur outside of a loop where this is the preferred behavior.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "strxcpyx.h"
+
+size_t strpcpy(char **dest, size_t size, const char *src) {
+ size_t len;
+
+ assert(dest);
+ assert(src);
+
+ if (size == 0)
+ return 0;
+
+ len = strlen(src);
+ if (len >= size) {
+ if (size > 1)
+ *dest = mempcpy(*dest, src, size-1);
+ size = 0;
+ } else if (len > 0) {
+ *dest = mempcpy(*dest, src, len);
+ size -= len;
+ }
+
+ *dest[0] = '\0';
+ return size;
+}
+
+size_t strpcpyf(char **dest, size_t size, const char *src, ...) {
+ va_list va;
+ int i;
+
+ assert(dest);
+ assert(src);
+
+ if (size == 0)
+ return 0;
+
+ va_start(va, src);
+ i = vsnprintf(*dest, size, src, va);
+ if (i < (int)size) {
+ *dest += i;
+ size -= i;
+ } else
+ size = 0;
+ va_end(va);
+ return size;
+}
+
+size_t strpcpyl(char **dest, size_t size, const char *src, ...) {
+ va_list va;
+
+ assert(dest);
+ assert(src);
+
+ va_start(va, src);
+ do {
+ size = strpcpy(dest, size, src);
+ src = va_arg(va, char *);
+ } while (src);
+ va_end(va);
+ return size;
+}
+
+size_t strscpy(char *dest, size_t size, const char *src) {
+ char *s;
+
+ assert(dest);
+ assert(src);
+
+ s = dest;
+ return strpcpy(&s, size, src);
+}
+
+size_t strscpyl(char *dest, size_t size, const char *src, ...) {
+ va_list va;
+ char *s;
+
+ assert(dest);
+ assert(src);
+
+ va_start(va, src);
+ s = dest;
+ do {
+ size = strpcpy(&s, size, src);
+ src = va_arg(va, char *);
+ } while (src);
+ va_end(va);
+
+ return size;
+}
diff --git a/src/basic/strxcpyx.h b/src/basic/strxcpyx.h
new file mode 100644
index 0000000..0f2b749
--- /dev/null
+++ b/src/basic/strxcpyx.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stddef.h>
+
+#include "macro.h"
+
+size_t strpcpy(char **dest, size_t size, const char *src);
+size_t strpcpyf(char **dest, size_t size, const char *src, ...) _printf_(3, 4);
+size_t strpcpyl(char **dest, size_t size, const char *src, ...) _sentinel_;
+size_t strscpy(char *dest, size_t size, const char *src);
+size_t strscpyl(char *dest, size_t size, const char *src, ...) _sentinel_;
diff --git a/src/basic/syslog-util.c b/src/basic/syslog-util.c
new file mode 100644
index 0000000..fe12948
--- /dev/null
+++ b/src/basic/syslog-util.c
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+#include <syslog.h>
+
+#include "hexdecoct.h"
+#include "macro.h"
+#include "string-table.h"
+#include "syslog-util.h"
+
+int syslog_parse_priority(const char **p, int *priority, bool with_facility) {
+ int a = 0, b = 0, c = 0;
+ const char *end;
+ size_t k;
+
+ assert(p);
+ assert(*p);
+ assert(priority);
+
+ if ((*p)[0] != '<')
+ return 0;
+
+ end = strchr(*p, '>');
+ if (!end)
+ return 0;
+
+ k = end - *p;
+ assert(k > 0);
+
+ if (k == 2)
+ c = undecchar((*p)[1]);
+ else if (k == 3) {
+ b = undecchar((*p)[1]);
+ c = undecchar((*p)[2]);
+ } else if (k == 4) {
+ a = undecchar((*p)[1]);
+ b = undecchar((*p)[2]);
+ c = undecchar((*p)[3]);
+ } else
+ return 0;
+
+ if (a < 0 || b < 0 || c < 0 ||
+ (!with_facility && (a || b || c > 7)))
+ return 0;
+
+ if (with_facility)
+ *priority = a*100 + b*10 + c;
+ else
+ *priority = (*priority & LOG_FACMASK) | c;
+
+ *p += k + 1;
+ return 1;
+}
+
+static const char *const log_facility_unshifted_table[LOG_NFACILITIES] = {
+ [LOG_FAC(LOG_KERN)] = "kern",
+ [LOG_FAC(LOG_USER)] = "user",
+ [LOG_FAC(LOG_MAIL)] = "mail",
+ [LOG_FAC(LOG_DAEMON)] = "daemon",
+ [LOG_FAC(LOG_AUTH)] = "auth",
+ [LOG_FAC(LOG_SYSLOG)] = "syslog",
+ [LOG_FAC(LOG_LPR)] = "lpr",
+ [LOG_FAC(LOG_NEWS)] = "news",
+ [LOG_FAC(LOG_UUCP)] = "uucp",
+ [LOG_FAC(LOG_CRON)] = "cron",
+ [LOG_FAC(LOG_AUTHPRIV)] = "authpriv",
+ [LOG_FAC(LOG_FTP)] = "ftp",
+ [LOG_FAC(LOG_LOCAL0)] = "local0",
+ [LOG_FAC(LOG_LOCAL1)] = "local1",
+ [LOG_FAC(LOG_LOCAL2)] = "local2",
+ [LOG_FAC(LOG_LOCAL3)] = "local3",
+ [LOG_FAC(LOG_LOCAL4)] = "local4",
+ [LOG_FAC(LOG_LOCAL5)] = "local5",
+ [LOG_FAC(LOG_LOCAL6)] = "local6",
+ [LOG_FAC(LOG_LOCAL7)] = "local7"
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(log_facility_unshifted, int, LOG_FAC(~0));
+
+bool log_facility_unshifted_is_valid(int facility) {
+ return facility >= 0 && facility <= LOG_FAC(~0);
+}
+
+static const char *const log_level_table[] = {
+ [LOG_EMERG] = "emerg",
+ [LOG_ALERT] = "alert",
+ [LOG_CRIT] = "crit",
+ [LOG_ERR] = "err",
+ [LOG_WARNING] = "warning",
+ [LOG_NOTICE] = "notice",
+ [LOG_INFO] = "info",
+ [LOG_DEBUG] = "debug"
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(log_level, int, LOG_DEBUG);
+
+bool log_level_is_valid(int level) {
+ return level >= 0 && level <= LOG_DEBUG;
+}
diff --git a/src/basic/syslog-util.h b/src/basic/syslog-util.h
new file mode 100644
index 0000000..8f419e8
--- /dev/null
+++ b/src/basic/syslog-util.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+int log_facility_unshifted_to_string_alloc(int i, char **s);
+int log_facility_unshifted_from_string(const char *s);
+bool log_facility_unshifted_is_valid(int faciliy);
+
+int log_level_to_string_alloc(int i, char **s);
+int log_level_from_string(const char *s);
+bool log_level_is_valid(int level);
+
+int syslog_parse_priority(const char **p, int *priority, bool with_facility);
diff --git a/src/basic/terminal-util.c b/src/basic/terminal-util.c
new file mode 100644
index 0000000..0f38120
--- /dev/null
+++ b/src/basic/terminal-util.c
@@ -0,0 +1,1318 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/kd.h>
+#include <linux/tiocl.h>
+#include <linux/vt.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/sysmacros.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "copy.h"
+#include "def.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "util.h"
+
+static volatile unsigned cached_columns = 0;
+static volatile unsigned cached_lines = 0;
+
+static volatile int cached_on_tty = -1;
+static volatile int cached_colors_enabled = -1;
+static volatile int cached_underline_enabled = -1;
+
+int chvt(int vt) {
+ _cleanup_close_ int fd;
+
+ /* Switch to the specified vt number. If the VT is specified <= 0 switch to the VT the kernel log messages go,
+ * if that's configured. */
+
+ fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ if (vt <= 0) {
+ int tiocl[2] = {
+ TIOCL_GETKMSGREDIRECT,
+ 0
+ };
+
+ if (ioctl(fd, TIOCLINUX, tiocl) < 0)
+ return -errno;
+
+ vt = tiocl[0] <= 0 ? 1 : tiocl[0];
+ }
+
+ if (ioctl(fd, VT_ACTIVATE, vt) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int read_one_char(FILE *f, char *ret, usec_t t, bool *need_nl) {
+ _cleanup_free_ char *line = NULL;
+ struct termios old_termios;
+ int r;
+
+ assert(f);
+ assert(ret);
+
+ /* If this is a terminal, then switch canonical mode off, so that we can read a single character */
+ if (tcgetattr(fileno(f), &old_termios) >= 0) {
+ struct termios new_termios = old_termios;
+
+ new_termios.c_lflag &= ~ICANON;
+ new_termios.c_cc[VMIN] = 1;
+ new_termios.c_cc[VTIME] = 0;
+
+ if (tcsetattr(fileno(f), TCSADRAIN, &new_termios) >= 0) {
+ char c;
+
+ if (t != USEC_INFINITY) {
+ if (fd_wait_for_event(fileno(f), POLLIN, t) <= 0) {
+ (void) tcsetattr(fileno(f), TCSADRAIN, &old_termios);
+ return -ETIMEDOUT;
+ }
+ }
+
+ r = safe_fgetc(f, &c);
+ (void) tcsetattr(fileno(f), TCSADRAIN, &old_termios);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EIO;
+
+ if (need_nl)
+ *need_nl = c != '\n';
+
+ *ret = c;
+ return 0;
+ }
+ }
+
+ if (t != USEC_INFINITY) {
+ if (fd_wait_for_event(fileno(f), POLLIN, t) <= 0)
+ return -ETIMEDOUT;
+ }
+
+ /* If this is not a terminal, then read a full line instead */
+
+ r = read_line(f, 16, &line); /* longer than necessary, to eat up UTF-8 chars/vt100 key sequences */
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EIO;
+
+ if (strlen(line) != 1)
+ return -EBADMSG;
+
+ if (need_nl)
+ *need_nl = false;
+
+ *ret = line[0];
+ return 0;
+}
+
+#define DEFAULT_ASK_REFRESH_USEC (2*USEC_PER_SEC)
+
+int ask_char(char *ret, const char *replies, const char *fmt, ...) {
+ int r;
+
+ assert(ret);
+ assert(replies);
+ assert(fmt);
+
+ for (;;) {
+ va_list ap;
+ char c;
+ bool need_nl = true;
+
+ if (colors_enabled())
+ fputs(ANSI_HIGHLIGHT, stdout);
+
+ putchar('\r');
+
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+
+ if (colors_enabled())
+ fputs(ANSI_NORMAL, stdout);
+
+ fflush(stdout);
+
+ r = read_one_char(stdin, &c, DEFAULT_ASK_REFRESH_USEC, &need_nl);
+ if (r < 0) {
+
+ if (r == -ETIMEDOUT)
+ continue;
+
+ if (r == -EBADMSG) {
+ puts("Bad input, please try again.");
+ continue;
+ }
+
+ putchar('\n');
+ return r;
+ }
+
+ if (need_nl)
+ putchar('\n');
+
+ if (strchr(replies, c)) {
+ *ret = c;
+ return 0;
+ }
+
+ puts("Read unexpected character, please try again.");
+ }
+}
+
+int ask_string(char **ret, const char *text, ...) {
+ int r;
+
+ assert(ret);
+ assert(text);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ va_list ap;
+
+ if (colors_enabled())
+ fputs(ANSI_HIGHLIGHT, stdout);
+
+ va_start(ap, text);
+ vprintf(text, ap);
+ va_end(ap);
+
+ if (colors_enabled())
+ fputs(ANSI_NORMAL, stdout);
+
+ fflush(stdout);
+
+ r = read_line(stdin, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EIO;
+
+ if (!isempty(line)) {
+ *ret = TAKE_PTR(line);
+ return 0;
+ }
+ }
+}
+
+int reset_terminal_fd(int fd, bool switch_to_text) {
+ struct termios termios;
+ int r = 0;
+
+ /* Set terminal to some sane defaults */
+
+ assert(fd >= 0);
+
+ /* We leave locked terminal attributes untouched, so that
+ * Plymouth may set whatever it wants to set, and we don't
+ * interfere with that. */
+
+ /* Disable exclusive mode, just in case */
+ (void) ioctl(fd, TIOCNXCL);
+
+ /* Switch to text mode */
+ if (switch_to_text)
+ (void) ioctl(fd, KDSETMODE, KD_TEXT);
+
+ /* Set default keyboard mode */
+ (void) vt_reset_keyboard(fd);
+
+ if (tcgetattr(fd, &termios) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ /* We only reset the stuff that matters to the software. How
+ * hardware is set up we don't touch assuming that somebody
+ * else will do that for us */
+
+ termios.c_iflag &= ~(IGNBRK | BRKINT | ISTRIP | INLCR | IGNCR | IUCLC);
+ termios.c_iflag |= ICRNL | IMAXBEL | IUTF8;
+ termios.c_oflag |= ONLCR;
+ termios.c_cflag |= CREAD;
+ termios.c_lflag = ISIG | ICANON | IEXTEN | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOPRT | ECHOKE;
+
+ termios.c_cc[VINTR] = 03; /* ^C */
+ termios.c_cc[VQUIT] = 034; /* ^\ */
+ termios.c_cc[VERASE] = 0177;
+ termios.c_cc[VKILL] = 025; /* ^X */
+ termios.c_cc[VEOF] = 04; /* ^D */
+ termios.c_cc[VSTART] = 021; /* ^Q */
+ termios.c_cc[VSTOP] = 023; /* ^S */
+ termios.c_cc[VSUSP] = 032; /* ^Z */
+ termios.c_cc[VLNEXT] = 026; /* ^V */
+ termios.c_cc[VWERASE] = 027; /* ^W */
+ termios.c_cc[VREPRINT] = 022; /* ^R */
+ termios.c_cc[VEOL] = 0;
+ termios.c_cc[VEOL2] = 0;
+
+ termios.c_cc[VTIME] = 0;
+ termios.c_cc[VMIN] = 1;
+
+ if (tcsetattr(fd, TCSANOW, &termios) < 0)
+ r = -errno;
+
+finish:
+ /* Just in case, flush all crap out */
+ (void) tcflush(fd, TCIOFLUSH);
+
+ return r;
+}
+
+int reset_terminal(const char *name) {
+ _cleanup_close_ int fd = -1;
+
+ /* We open the terminal with O_NONBLOCK here, to ensure we
+ * don't block on carrier if this is a terminal with carrier
+ * configured. */
+
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return fd;
+
+ return reset_terminal_fd(fd, true);
+}
+
+int open_terminal(const char *name, int mode) {
+ unsigned c = 0;
+ int fd;
+
+ /*
+ * If a TTY is in the process of being closed opening it might
+ * cause EIO. This is horribly awful, but unlikely to be
+ * changed in the kernel. Hence we work around this problem by
+ * retrying a couple of times.
+ *
+ * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/554172/comments/245
+ */
+
+ if (mode & O_CREAT)
+ return -EINVAL;
+
+ for (;;) {
+ fd = open(name, mode, 0);
+ if (fd >= 0)
+ break;
+
+ if (errno != EIO)
+ return -errno;
+
+ /* Max 1s in total */
+ if (c >= 20)
+ return -errno;
+
+ usleep(50 * USEC_PER_MSEC);
+ c++;
+ }
+
+ if (isatty(fd) <= 0) {
+ safe_close(fd);
+ return -ENOTTY;
+ }
+
+ return fd;
+}
+
+int acquire_terminal(
+ const char *name,
+ AcquireTerminalFlags flags,
+ usec_t timeout) {
+
+ _cleanup_close_ int notify = -1, fd = -1;
+ usec_t ts = USEC_INFINITY;
+ int r, wd = -1;
+
+ assert(name);
+ assert(IN_SET(flags & ~ACQUIRE_TERMINAL_PERMISSIVE, ACQUIRE_TERMINAL_TRY, ACQUIRE_TERMINAL_FORCE, ACQUIRE_TERMINAL_WAIT));
+
+ /* We use inotify to be notified when the tty is closed. We create the watch before checking if we can actually
+ * acquire it, so that we don't lose any event.
+ *
+ * Note: strictly speaking this actually watches for the device being closed, it does *not* really watch
+ * whether a tty loses its controlling process. However, unless some rogue process uses TIOCNOTTY on /dev/tty
+ * *after* closing its tty otherwise this will not become a problem. As long as the administrator makes sure to
+ * not configure any service on the same tty as an untrusted user this should not be a problem. (Which they
+ * probably should not do anyway.) */
+
+ if ((flags & ~ACQUIRE_TERMINAL_PERMISSIVE) == ACQUIRE_TERMINAL_WAIT) {
+ notify = inotify_init1(IN_CLOEXEC | (timeout != USEC_INFINITY ? IN_NONBLOCK : 0));
+ if (notify < 0)
+ return -errno;
+
+ wd = inotify_add_watch(notify, name, IN_CLOSE);
+ if (wd < 0)
+ return -errno;
+
+ if (timeout != USEC_INFINITY)
+ ts = now(CLOCK_MONOTONIC);
+ }
+
+ for (;;) {
+ struct sigaction sa_old, sa_new = {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_RESTART,
+ };
+
+ if (notify >= 0) {
+ r = flush_fd(notify);
+ if (r < 0)
+ return r;
+ }
+
+ /* We pass here O_NOCTTY only so that we can check the return value TIOCSCTTY and have a reliable way
+ * to figure out if we successfully became the controlling process of the tty */
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ /* Temporarily ignore SIGHUP, so that we don't get SIGHUP'ed if we already own the tty. */
+ assert_se(sigaction(SIGHUP, &sa_new, &sa_old) == 0);
+
+ /* First, try to get the tty */
+ r = ioctl(fd, TIOCSCTTY,
+ (flags & ~ACQUIRE_TERMINAL_PERMISSIVE) == ACQUIRE_TERMINAL_FORCE) < 0 ? -errno : 0;
+
+ /* Reset signal handler to old value */
+ assert_se(sigaction(SIGHUP, &sa_old, NULL) == 0);
+
+ /* Success? Exit the loop now! */
+ if (r >= 0)
+ break;
+
+ /* Any failure besides -EPERM? Fail, regardless of the mode. */
+ if (r != -EPERM)
+ return r;
+
+ if (flags & ACQUIRE_TERMINAL_PERMISSIVE) /* If we are in permissive mode, then EPERM is fine, turn this
+ * into a success. Note that EPERM is also returned if we
+ * already are the owner of the TTY. */
+ break;
+
+ if (flags != ACQUIRE_TERMINAL_WAIT) /* If we are in TRY or FORCE mode, then propagate EPERM as EPERM */
+ return r;
+
+ assert(notify >= 0);
+ assert(wd >= 0);
+
+ for (;;) {
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+
+ if (timeout != USEC_INFINITY) {
+ usec_t n;
+
+ assert(ts != USEC_INFINITY);
+
+ n = now(CLOCK_MONOTONIC);
+ if (ts + timeout < n)
+ return -ETIMEDOUT;
+
+ r = fd_wait_for_event(notify, POLLIN, ts + timeout - n);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+ }
+
+ l = read(notify, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ continue;
+
+ return -errno;
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ if (e->mask & IN_Q_OVERFLOW) /* If we hit an inotify queue overflow, simply check if the terminal is up for grabs now. */
+ break;
+
+ if (e->wd != wd || !(e->mask & IN_CLOSE)) /* Safety checks */
+ return -EIO;
+ }
+
+ break;
+ }
+
+ /* We close the tty fd here since if the old session ended our handle will be dead. It's important that
+ * we do this after sleeping, so that we don't enter an endless loop. */
+ fd = safe_close(fd);
+ }
+
+ return TAKE_FD(fd);
+}
+
+int release_terminal(void) {
+ static const struct sigaction sa_new = {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_RESTART,
+ };
+
+ _cleanup_close_ int fd = -1;
+ struct sigaction sa_old;
+ int r;
+
+ fd = open("/dev/tty", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ /* Temporarily ignore SIGHUP, so that we don't get SIGHUP'ed
+ * by our own TIOCNOTTY */
+ assert_se(sigaction(SIGHUP, &sa_new, &sa_old) == 0);
+
+ r = ioctl(fd, TIOCNOTTY) < 0 ? -errno : 0;
+
+ assert_se(sigaction(SIGHUP, &sa_old, NULL) == 0);
+
+ return r;
+}
+
+int terminal_vhangup_fd(int fd) {
+ assert(fd >= 0);
+
+ if (ioctl(fd, TIOCVHANGUP) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int terminal_vhangup(const char *name) {
+ _cleanup_close_ int fd;
+
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return fd;
+
+ return terminal_vhangup_fd(fd);
+}
+
+int vt_disallocate(const char *name) {
+ _cleanup_close_ int fd = -1;
+ const char *e, *n;
+ unsigned u;
+ int r;
+
+ /* Deallocate the VT if possible. If not possible
+ * (i.e. because it is the active one), at least clear it
+ * entirely (including the scrollback buffer) */
+
+ e = path_startswith(name, "/dev/");
+ if (!e)
+ return -EINVAL;
+
+ if (!tty_is_vc(name)) {
+ /* So this is not a VT. I guess we cannot deallocate
+ * it then. But let's at least clear the screen */
+
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ loop_write(fd,
+ "\033[r" /* clear scrolling region */
+ "\033[H" /* move home */
+ "\033[2J", /* clear screen */
+ 10, false);
+ return 0;
+ }
+
+ n = startswith(e, "tty");
+ if (!n)
+ return -EINVAL;
+
+ r = safe_atou(n, &u);
+ if (r < 0)
+ return r;
+
+ if (u <= 0)
+ return -EINVAL;
+
+ /* Try to deallocate */
+ fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return fd;
+
+ r = ioctl(fd, VT_DISALLOCATE, u);
+ fd = safe_close(fd);
+
+ if (r >= 0)
+ return 0;
+
+ if (errno != EBUSY)
+ return -errno;
+
+ /* Couldn't deallocate, so let's clear it fully with
+ * scrollback */
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ loop_write(fd,
+ "\033[r" /* clear scrolling region */
+ "\033[H" /* move home */
+ "\033[3J", /* clear screen including scrollback, requires Linux 2.6.40 */
+ 10, false);
+ return 0;
+}
+
+int make_console_stdio(void) {
+ int fd, r;
+
+ /* Make /dev/console the controlling terminal and stdin/stdout/stderr */
+
+ fd = acquire_terminal("/dev/console", ACQUIRE_TERMINAL_FORCE|ACQUIRE_TERMINAL_PERMISSIVE, USEC_INFINITY);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to acquire terminal: %m");
+
+ r = reset_terminal_fd(fd, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to reset terminal, ignoring: %m");
+
+ r = rearrange_stdio(fd, fd, fd); /* This invalidates 'fd' both on success and on failure. */
+ if (r < 0)
+ return log_error_errno(r, "Failed to make terminal stdin/stdout/stderr: %m");
+
+ reset_terminal_feature_caches();
+
+ return 0;
+}
+
+bool tty_is_vc(const char *tty) {
+ assert(tty);
+
+ return vtnr_from_tty(tty) >= 0;
+}
+
+bool tty_is_console(const char *tty) {
+ assert(tty);
+
+ return streq(skip_dev_prefix(tty), "console");
+}
+
+int vtnr_from_tty(const char *tty) {
+ int i, r;
+
+ assert(tty);
+
+ tty = skip_dev_prefix(tty);
+
+ if (!startswith(tty, "tty") )
+ return -EINVAL;
+
+ if (tty[3] < '0' || tty[3] > '9')
+ return -EINVAL;
+
+ r = safe_atoi(tty+3, &i);
+ if (r < 0)
+ return r;
+
+ if (i < 0 || i > 63)
+ return -EINVAL;
+
+ return i;
+}
+
+ int resolve_dev_console(char **ret) {
+ _cleanup_free_ char *active = NULL;
+ char *tty;
+ int r;
+
+ assert(ret);
+
+ /* Resolve where /dev/console is pointing to, if /sys is actually ours (i.e. not read-only-mounted which is a
+ * sign for container setups) */
+
+ if (path_is_read_only_fs("/sys") > 0)
+ return -ENOMEDIUM;
+
+ r = read_one_line_file("/sys/class/tty/console/active", &active);
+ if (r < 0)
+ return r;
+
+ /* If multiple log outputs are configured the last one is what /dev/console points to */
+ tty = strrchr(active, ' ');
+ if (tty)
+ tty++;
+ else
+ tty = active;
+
+ if (streq(tty, "tty0")) {
+ active = mfree(active);
+
+ /* Get the active VC (e.g. tty1) */
+ r = read_one_line_file("/sys/class/tty/tty0/active", &active);
+ if (r < 0)
+ return r;
+
+ tty = active;
+ }
+
+ if (tty == active)
+ *ret = TAKE_PTR(active);
+ else {
+ char *tmp;
+
+ tmp = strdup(tty);
+ if (!tmp)
+ return -ENOMEM;
+
+ *ret = tmp;
+ }
+
+ return 0;
+}
+
+int get_kernel_consoles(char ***ret) {
+ _cleanup_strv_free_ char **l = NULL;
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ int r;
+
+ assert(ret);
+
+ /* If /sys is mounted read-only this means we are running in some kind of container environment. In that
+ * case /sys would reflect the host system, not us, hence ignore the data we can read from it. */
+ if (path_is_read_only_fs("/sys") > 0)
+ goto fallback;
+
+ r = read_one_line_file("/sys/class/tty/console/active", &line);
+ if (r < 0)
+ return r;
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *tty = NULL;
+ char *path;
+
+ r = extract_first_word(&p, &tty, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (streq(tty, "tty0")) {
+ tty = mfree(tty);
+ r = read_one_line_file("/sys/class/tty/tty0/active", &tty);
+ if (r < 0)
+ return r;
+ }
+
+ path = strappend("/dev/", tty);
+ if (!path)
+ return -ENOMEM;
+
+ if (access(path, F_OK) < 0) {
+ log_debug_errno(errno, "Console device %s is not accessible, skipping: %m", path);
+ free(path);
+ continue;
+ }
+
+ r = strv_consume(&l, path);
+ if (r < 0)
+ return r;
+ }
+
+ if (strv_isempty(l)) {
+ log_debug("No devices found for system console");
+ goto fallback;
+ }
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+
+fallback:
+ r = strv_extend(&l, "/dev/console");
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+}
+
+bool tty_is_vc_resolve(const char *tty) {
+ _cleanup_free_ char *resolved = NULL;
+
+ assert(tty);
+
+ tty = skip_dev_prefix(tty);
+
+ if (streq(tty, "console")) {
+ if (resolve_dev_console(&resolved) < 0)
+ return false;
+
+ tty = resolved;
+ }
+
+ return tty_is_vc(tty);
+}
+
+const char *default_term_for_tty(const char *tty) {
+ return tty && tty_is_vc_resolve(tty) ? "linux" : "vt220";
+}
+
+int fd_columns(int fd) {
+ struct winsize ws = {};
+
+ if (ioctl(fd, TIOCGWINSZ, &ws) < 0)
+ return -errno;
+
+ if (ws.ws_col <= 0)
+ return -EIO;
+
+ return ws.ws_col;
+}
+
+unsigned columns(void) {
+ const char *e;
+ int c;
+
+ if (cached_columns > 0)
+ return cached_columns;
+
+ c = 0;
+ e = getenv("COLUMNS");
+ if (e)
+ (void) safe_atoi(e, &c);
+
+ if (c <= 0 || c > USHRT_MAX) {
+ c = fd_columns(STDOUT_FILENO);
+ if (c <= 0)
+ c = 80;
+ }
+
+ cached_columns = c;
+ return cached_columns;
+}
+
+int fd_lines(int fd) {
+ struct winsize ws = {};
+
+ if (ioctl(fd, TIOCGWINSZ, &ws) < 0)
+ return -errno;
+
+ if (ws.ws_row <= 0)
+ return -EIO;
+
+ return ws.ws_row;
+}
+
+unsigned lines(void) {
+ const char *e;
+ int l;
+
+ if (cached_lines > 0)
+ return cached_lines;
+
+ l = 0;
+ e = getenv("LINES");
+ if (e)
+ (void) safe_atoi(e, &l);
+
+ if (l <= 0 || l > USHRT_MAX) {
+ l = fd_lines(STDOUT_FILENO);
+ if (l <= 0)
+ l = 24;
+ }
+
+ cached_lines = l;
+ return cached_lines;
+}
+
+/* intended to be used as a SIGWINCH sighandler */
+void columns_lines_cache_reset(int signum) {
+ cached_columns = 0;
+ cached_lines = 0;
+}
+
+void reset_terminal_feature_caches(void) {
+ cached_columns = 0;
+ cached_lines = 0;
+
+ cached_colors_enabled = -1;
+ cached_underline_enabled = -1;
+ cached_on_tty = -1;
+}
+
+bool on_tty(void) {
+
+ /* We check both stdout and stderr, so that situations where pipes on the shell are used are reliably
+ * recognized, regardless if only the output or the errors are piped to some place. Since on_tty() is generally
+ * used to default to a safer, non-interactive, non-color mode of operation it's probably good to be defensive
+ * here, and check for both. Note that we don't check for STDIN_FILENO, because it should fine to use fancy
+ * terminal functionality when outputting stuff, even if the input is piped to us. */
+
+ if (cached_on_tty < 0)
+ cached_on_tty =
+ isatty(STDOUT_FILENO) > 0 &&
+ isatty(STDERR_FILENO) > 0;
+
+ return cached_on_tty;
+}
+
+int getttyname_malloc(int fd, char **ret) {
+ size_t l = 100;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ char path[l];
+
+ r = ttyname_r(fd, path, sizeof(path));
+ if (r == 0) {
+ char *c;
+
+ c = strdup(skip_dev_prefix(path));
+ if (!c)
+ return -ENOMEM;
+
+ *ret = c;
+ return 0;
+ }
+
+ if (r != ERANGE)
+ return -r;
+
+ l *= 2;
+ }
+
+ return 0;
+}
+
+int getttyname_harder(int fd, char **r) {
+ int k;
+ char *s = NULL;
+
+ k = getttyname_malloc(fd, &s);
+ if (k < 0)
+ return k;
+
+ if (streq(s, "tty")) {
+ free(s);
+ return get_ctty(0, NULL, r);
+ }
+
+ *r = s;
+ return 0;
+}
+
+int get_ctty_devnr(pid_t pid, dev_t *d) {
+ int r;
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ unsigned long ttynr;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "stat");
+ r = read_one_line_file(p, &line);
+ if (r < 0)
+ return r;
+
+ p = strrchr(line, ')');
+ if (!p)
+ return -EIO;
+
+ p++;
+
+ if (sscanf(p, " "
+ "%*c " /* state */
+ "%*d " /* ppid */
+ "%*d " /* pgrp */
+ "%*d " /* session */
+ "%lu ", /* ttynr */
+ &ttynr) != 1)
+ return -EIO;
+
+ if (major(ttynr) == 0 && minor(ttynr) == 0)
+ return -ENXIO;
+
+ if (d)
+ *d = (dev_t) ttynr;
+
+ return 0;
+}
+
+int get_ctty(pid_t pid, dev_t *ret_devnr, char **ret) {
+ _cleanup_free_ char *fn = NULL, *b = NULL;
+ dev_t devnr;
+ int r;
+
+ r = get_ctty_devnr(pid, &devnr);
+ if (r < 0)
+ return r;
+
+ r = device_path_make_canonical(S_IFCHR, devnr, &fn);
+ if (r < 0) {
+ if (r != -ENOENT) /* No symlink for this in /dev/char/? */
+ return r;
+
+ if (major(devnr) == 136) {
+ /* This is an ugly hack: PTY devices are not listed in /dev/char/, as they don't follow the
+ * Linux device model. This means we have no nice way to match them up against their actual
+ * device node. Let's hence do the check by the fixed, assigned major number. Normally we try
+ * to avoid such fixed major/minor matches, but there appears to nother nice way to handle
+ * this. */
+
+ if (asprintf(&b, "pts/%u", minor(devnr)) < 0)
+ return -ENOMEM;
+ } else {
+ /* Probably something similar to the ptys which have no symlink in /dev/char/. Let's return
+ * something vaguely useful. */
+
+ r = device_path_make_major_minor(S_IFCHR, devnr, &fn);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!b) {
+ const char *w;
+
+ w = path_startswith(fn, "/dev/");
+ if (w) {
+ b = strdup(w);
+ if (!b)
+ return -ENOMEM;
+ } else
+ b = TAKE_PTR(fn);
+ }
+
+ if (ret)
+ *ret = TAKE_PTR(b);
+
+ if (ret_devnr)
+ *ret_devnr = devnr;
+
+ return 0;
+}
+
+int ptsname_malloc(int fd, char **ret) {
+ size_t l = 100;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ char *c;
+
+ c = new(char, l);
+ if (!c)
+ return -ENOMEM;
+
+ if (ptsname_r(fd, c, l) == 0) {
+ *ret = c;
+ return 0;
+ }
+ if (errno != ERANGE) {
+ free(c);
+ return -errno;
+ }
+
+ free(c);
+ l *= 2;
+ }
+}
+
+int ptsname_namespace(int pty, char **ret) {
+ int no = -1, r;
+
+ /* Like ptsname(), but doesn't assume that the path is
+ * accessible in the local namespace. */
+
+ r = ioctl(pty, TIOCGPTN, &no);
+ if (r < 0)
+ return -errno;
+
+ if (no < 0)
+ return -EIO;
+
+ if (asprintf(ret, "/dev/pts/%i", no) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int openpt_in_namespace(pid_t pid, int flags) {
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1;
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ pid_t child;
+ int r;
+
+ assert(pid > 0);
+
+ r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-openptns)", "(sd-openpt)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ int master;
+
+ pair[0] = safe_close(pair[0]);
+
+ master = posix_openpt(flags|O_NOCTTY|O_CLOEXEC);
+ if (master < 0)
+ _exit(EXIT_FAILURE);
+
+ if (unlockpt(master) < 0)
+ _exit(EXIT_FAILURE);
+
+ if (send_one_fd(pair[1], master, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-openptns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ return receive_one_fd(pair[0], 0);
+}
+
+int open_terminal_in_namespace(pid_t pid, const char *name, int mode) {
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1;
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ pid_t child;
+ int r;
+
+ r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-terminalns)", "(sd-terminal)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ int master;
+
+ pair[0] = safe_close(pair[0]);
+
+ master = open_terminal(name, mode|O_NOCTTY|O_CLOEXEC);
+ if (master < 0)
+ _exit(EXIT_FAILURE);
+
+ if (send_one_fd(pair[1], master, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-terminalns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ return receive_one_fd(pair[0], 0);
+}
+
+static bool getenv_terminal_is_dumb(void) {
+ const char *e;
+
+ e = getenv("TERM");
+ if (!e)
+ return true;
+
+ return streq(e, "dumb");
+}
+
+bool terminal_is_dumb(void) {
+ if (!on_tty())
+ return true;
+
+ return getenv_terminal_is_dumb();
+}
+
+bool colors_enabled(void) {
+
+ /* Returns true if colors are considered supported on our stdout. For that we check $SYSTEMD_COLORS first
+ * (which is the explicit way to turn colors on/off). If that didn't work we turn colors off unless we are on a
+ * TTY. And if we are on a TTY we turn it off if $TERM is set to "dumb". There's one special tweak though: if
+ * we are PID 1 then we do not check whether we are connected to a TTY, because we don't keep /dev/console open
+ * continously due to fear of SAK, and hence things are a bit weird. */
+
+ if (cached_colors_enabled < 0) {
+ int val;
+
+ val = getenv_bool("SYSTEMD_COLORS");
+ if (val >= 0)
+ cached_colors_enabled = val;
+ else if (getpid_cached() == 1)
+ /* PID1 outputs to the console without holding it open all the time */
+ cached_colors_enabled = !getenv_terminal_is_dumb();
+ else
+ cached_colors_enabled = !terminal_is_dumb();
+ }
+
+ return cached_colors_enabled;
+}
+
+bool dev_console_colors_enabled(void) {
+ _cleanup_free_ char *s = NULL;
+ int b;
+
+ /* Returns true if we assume that color is supported on /dev/console.
+ *
+ * For that we first check if we explicitly got told to use colors or not, by checking $SYSTEMD_COLORS. If that
+ * isn't set we check whether PID 1 has $TERM set, and if not, whether TERM is set on the kernel command
+ * line. If we find $TERM set we assume color if it's not set to "dumb", similarly to how regular
+ * colors_enabled() operates. */
+
+ b = getenv_bool("SYSTEMD_COLORS");
+ if (b >= 0)
+ return b;
+
+ if (getenv_for_pid(1, "TERM", &s) <= 0)
+ (void) proc_cmdline_get_key("TERM", 0, &s);
+
+ return !streq_ptr(s, "dumb");
+}
+
+bool underline_enabled(void) {
+
+ if (cached_underline_enabled < 0) {
+
+ /* The Linux console doesn't support underlining, turn it off, but only there. */
+
+ if (colors_enabled())
+ cached_underline_enabled = !streq_ptr(getenv("TERM"), "linux");
+ else
+ cached_underline_enabled = false;
+ }
+
+ return cached_underline_enabled;
+}
+
+int vt_default_utf8(void) {
+ _cleanup_free_ char *b = NULL;
+ int r;
+
+ /* Read the default VT UTF8 setting from the kernel */
+
+ r = read_one_line_file("/sys/module/vt/parameters/default_utf8", &b);
+ if (r < 0)
+ return r;
+
+ return parse_boolean(b);
+}
+
+int vt_reset_keyboard(int fd) {
+ int kb;
+
+ /* If we can't read the default, then default to unicode. It's 2017 after all. */
+ kb = vt_default_utf8() != 0 ? K_UNICODE : K_XLATE;
+
+ if (ioctl(fd, KDSKBMODE, kb) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int vt_restore(int fd) {
+ static const struct vt_mode mode = {
+ .mode = VT_AUTO,
+ };
+ int r, q = 0;
+
+ r = ioctl(fd, KDSETMODE, KD_TEXT);
+ if (r < 0)
+ q = log_debug_errno(errno, "Failed to set VT in text mode, ignoring: %m");
+
+ r = vt_reset_keyboard(fd);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to reset keyboard mode, ignoring: %m");
+ if (q >= 0)
+ q = r;
+ }
+
+ r = ioctl(fd, VT_SETMODE, &mode);
+ if (r < 0) {
+ log_debug_errno(errno, "Failed to set VT_AUTO mode, ignoring: %m");
+ if (q >= 0)
+ q = -errno;
+ }
+
+ r = fchown(fd, 0, (gid_t) -1);
+ if (r < 0) {
+ log_debug_errno(errno, "Failed to chown VT, ignoring: %m");
+ if (q >= 0)
+ q = -errno;
+ }
+
+ return q;
+}
+
+int vt_release(int fd, bool restore) {
+ assert(fd >= 0);
+
+ /* This function releases the VT by acknowledging the VT-switch signal
+ * sent by the kernel and optionally reset the VT in text and auto
+ * VT-switching modes. */
+
+ if (ioctl(fd, VT_RELDISP, 1) < 0)
+ return -errno;
+
+ if (restore)
+ return vt_restore(fd);
+
+ return 0;
+}
diff --git a/src/basic/terminal-util.h b/src/basic/terminal-util.h
new file mode 100644
index 0000000..c885e0a
--- /dev/null
+++ b/src/basic/terminal-util.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "time-util.h"
+
+/* Regular colors */
+#define ANSI_BLACK "\x1B[0;30m"
+#define ANSI_RED "\x1B[0;31m"
+#define ANSI_GREEN "\x1B[0;32m"
+#define ANSI_YELLOW "\x1B[0;33m"
+#define ANSI_BLUE "\x1B[0;34m"
+#define ANSI_MAGENTA "\x1B[0;35m"
+#define ANSI_CYAN "\x1B[0;36m"
+#define ANSI_WHITE "\x1B[0;37m"
+#define ANSI_GREY "\x1B[0;2;37m"
+
+/* Bold/highlighted */
+#define ANSI_HIGHLIGHT_BLACK "\x1B[0;1;30m"
+#define ANSI_HIGHLIGHT_RED "\x1B[0;1;31m"
+#define ANSI_HIGHLIGHT_GREEN "\x1B[0;1;32m"
+#define ANSI_HIGHLIGHT_YELLOW "\x1B[0;1;33m"
+#define ANSI_HIGHLIGHT_BLUE "\x1B[0;1;34m"
+#define ANSI_HIGHLIGHT_MAGENTA "\x1B[0;1;35m"
+#define ANSI_HIGHLIGHT_CYAN "\x1B[0;1;36m"
+#define ANSI_HIGHLIGHT_WHITE "\x1B[0;1;37m"
+
+/* Underlined */
+#define ANSI_HIGHLIGHT_BLACK_UNDERLINE "\x1B[0;1;4;30m"
+#define ANSI_HIGHLIGHT_RED_UNDERLINE "\x1B[0;1;4;31m"
+#define ANSI_HIGHLIGHT_GREEN_UNDERLINE "\x1B[0;1;4;32m"
+#define ANSI_HIGHLIGHT_YELLOW_UNDERLINE "\x1B[0;1;4;33m"
+#define ANSI_HIGHLIGHT_BLUE_UNDERLINE "\x1B[0;1;4;34m"
+#define ANSI_HIGHLIGHT_MAGENTA_UNDERLINE "\x1B[0;1;4;35m"
+#define ANSI_HIGHLIGHT_CYAN_UNDERLINE "\x1B[0;1;4;36m"
+#define ANSI_HIGHLIGHT_WHITE_UNDERLINE "\x1B[0;1;4;37m"
+
+/* Other ANSI codes */
+#define ANSI_UNDERLINE "\x1B[0;4m"
+#define ANSI_HIGHLIGHT "\x1B[0;1;39m"
+#define ANSI_HIGHLIGHT_UNDERLINE "\x1B[0;1;4m"
+
+/* Reset/clear ANSI styles */
+#define ANSI_NORMAL "\x1B[0m"
+
+/* Erase characters until the end of the line */
+#define ANSI_ERASE_TO_END_OF_LINE "\x1B[K"
+
+/* Move cursor up one line */
+#define ANSI_REVERSE_LINEFEED "\x1BM"
+
+/* Set cursor to top left corner and clear screen */
+#define ANSI_HOME_CLEAR "\x1B[H\x1B[2J"
+
+int reset_terminal_fd(int fd, bool switch_to_text);
+int reset_terminal(const char *name);
+
+int open_terminal(const char *name, int mode);
+
+/* Flags for tweaking the way we become the controlling process of a terminal. */
+typedef enum AcquireTerminalFlags {
+ /* Try to become the controlling process of the TTY. If we can't return -EPERM. */
+ ACQUIRE_TERMINAL_TRY = 0,
+
+ /* Tell the kernel to forcibly make us the controlling process of the TTY. Returns -EPERM if the kernel doesn't allow that. */
+ ACQUIRE_TERMINAL_FORCE = 1,
+
+ /* If we can't become the controlling process of the TTY right-away, then wait until we can. */
+ ACQUIRE_TERMINAL_WAIT = 2,
+
+ /* Pick one of the above, and then OR this flag in, in order to request permissive behaviour, if we can't become controlling process then don't mind */
+ ACQUIRE_TERMINAL_PERMISSIVE = 1 << 2,
+} AcquireTerminalFlags;
+
+int acquire_terminal(const char *name, AcquireTerminalFlags flags, usec_t timeout);
+int release_terminal(void);
+
+int terminal_vhangup_fd(int fd);
+int terminal_vhangup(const char *name);
+
+int chvt(int vt);
+
+int read_one_char(FILE *f, char *ret, usec_t timeout, bool *need_nl);
+int ask_char(char *ret, const char *replies, const char *text, ...) _printf_(3, 4);
+int ask_string(char **ret, const char *text, ...) _printf_(2, 3);
+
+int vt_disallocate(const char *name);
+
+int resolve_dev_console(char **ret);
+int get_kernel_consoles(char ***ret);
+bool tty_is_vc(const char *tty);
+bool tty_is_vc_resolve(const char *tty);
+bool tty_is_console(const char *tty) _pure_;
+int vtnr_from_tty(const char *tty);
+const char *default_term_for_tty(const char *tty);
+
+int make_console_stdio(void);
+
+int fd_columns(int fd);
+unsigned columns(void);
+int fd_lines(int fd);
+unsigned lines(void);
+
+void columns_lines_cache_reset(int _unused_ signum);
+void reset_terminal_feature_caches(void);
+
+bool on_tty(void);
+bool terminal_is_dumb(void);
+bool colors_enabled(void);
+bool underline_enabled(void);
+bool dev_console_colors_enabled(void);
+
+#define DEFINE_ANSI_FUNC(name, NAME) \
+ static inline const char *ansi_##name(void) { \
+ return colors_enabled() ? ANSI_##NAME : ""; \
+ }
+
+#define DEFINE_ANSI_FUNC_UNDERLINE(name, NAME, REPLACEMENT) \
+ static inline const char *ansi_##name(void) { \
+ return underline_enabled() ? ANSI_##NAME : \
+ colors_enabled() ? ANSI_##REPLACEMENT : ""; \
+ }
+
+DEFINE_ANSI_FUNC(highlight, HIGHLIGHT);
+DEFINE_ANSI_FUNC(highlight_red, HIGHLIGHT_RED);
+DEFINE_ANSI_FUNC(highlight_green, HIGHLIGHT_GREEN);
+DEFINE_ANSI_FUNC(highlight_yellow, HIGHLIGHT_YELLOW);
+DEFINE_ANSI_FUNC(highlight_blue, HIGHLIGHT_BLUE);
+DEFINE_ANSI_FUNC(highlight_magenta, HIGHLIGHT_MAGENTA);
+DEFINE_ANSI_FUNC(normal, NORMAL);
+DEFINE_ANSI_FUNC(grey, GREY);
+
+DEFINE_ANSI_FUNC_UNDERLINE(underline, UNDERLINE, NORMAL);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_underline, HIGHLIGHT_UNDERLINE, HIGHLIGHT);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_red_underline, HIGHLIGHT_RED_UNDERLINE, HIGHLIGHT_RED);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_green_underline, HIGHLIGHT_GREEN_UNDERLINE, HIGHLIGHT_GREEN);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_yellow_underline, HIGHLIGHT_YELLOW_UNDERLINE, HIGHLIGHT_YELLOW);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_blue_underline, HIGHLIGHT_BLUE_UNDERLINE, HIGHLIGHT_BLUE);
+
+int get_ctty_devnr(pid_t pid, dev_t *d);
+int get_ctty(pid_t, dev_t *_devnr, char **r);
+
+int getttyname_malloc(int fd, char **r);
+int getttyname_harder(int fd, char **r);
+
+int ptsname_malloc(int fd, char **ret);
+int ptsname_namespace(int pty, char **ret);
+
+int openpt_in_namespace(pid_t pid, int flags);
+int open_terminal_in_namespace(pid_t pid, const char *name, int mode);
+
+int vt_default_utf8(void);
+int vt_reset_keyboard(int fd);
+int vt_restore(int fd);
+int vt_release(int fd, bool restore_vt);
diff --git a/src/basic/time-util.c b/src/basic/time-util.c
new file mode 100644
index 0000000..62cdc30
--- /dev/null
+++ b/src/basic/time-util.c
@@ -0,0 +1,1471 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/timerfd.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_timerfd.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+static clockid_t map_clock_id(clockid_t c) {
+
+ /* Some more exotic archs (s390, ppc, …) lack the "ALARM" flavour of the clocks. Thus, clock_gettime() will
+ * fail for them. Since they are essentially the same as their non-ALARM pendants (their only difference is
+ * when timers are set on them), let's just map them accordingly. This way, we can get the correct time even on
+ * those archs. */
+
+ switch (c) {
+
+ case CLOCK_BOOTTIME_ALARM:
+ return CLOCK_BOOTTIME;
+
+ case CLOCK_REALTIME_ALARM:
+ return CLOCK_REALTIME;
+
+ default:
+ return c;
+ }
+}
+
+usec_t now(clockid_t clock_id) {
+ struct timespec ts;
+
+ assert_se(clock_gettime(map_clock_id(clock_id), &ts) == 0);
+
+ return timespec_load(&ts);
+}
+
+nsec_t now_nsec(clockid_t clock_id) {
+ struct timespec ts;
+
+ assert_se(clock_gettime(map_clock_id(clock_id), &ts) == 0);
+
+ return timespec_load_nsec(&ts);
+}
+
+dual_timestamp* dual_timestamp_get(dual_timestamp *ts) {
+ assert(ts);
+
+ ts->realtime = now(CLOCK_REALTIME);
+ ts->monotonic = now(CLOCK_MONOTONIC);
+
+ return ts;
+}
+
+triple_timestamp* triple_timestamp_get(triple_timestamp *ts) {
+ assert(ts);
+
+ ts->realtime = now(CLOCK_REALTIME);
+ ts->monotonic = now(CLOCK_MONOTONIC);
+ ts->boottime = clock_boottime_supported() ? now(CLOCK_BOOTTIME) : USEC_INFINITY;
+
+ return ts;
+}
+
+dual_timestamp* dual_timestamp_from_realtime(dual_timestamp *ts, usec_t u) {
+ int64_t delta;
+ assert(ts);
+
+ if (u == USEC_INFINITY || u <= 0) {
+ ts->realtime = ts->monotonic = u;
+ return ts;
+ }
+
+ ts->realtime = u;
+
+ delta = (int64_t) now(CLOCK_REALTIME) - (int64_t) u;
+ ts->monotonic = usec_sub_signed(now(CLOCK_MONOTONIC), delta);
+
+ return ts;
+}
+
+triple_timestamp* triple_timestamp_from_realtime(triple_timestamp *ts, usec_t u) {
+ int64_t delta;
+
+ assert(ts);
+
+ if (u == USEC_INFINITY || u <= 0) {
+ ts->realtime = ts->monotonic = ts->boottime = u;
+ return ts;
+ }
+
+ ts->realtime = u;
+ delta = (int64_t) now(CLOCK_REALTIME) - (int64_t) u;
+ ts->monotonic = usec_sub_signed(now(CLOCK_MONOTONIC), delta);
+ ts->boottime = clock_boottime_supported() ? usec_sub_signed(now(CLOCK_BOOTTIME), delta) : USEC_INFINITY;
+
+ return ts;
+}
+
+dual_timestamp* dual_timestamp_from_monotonic(dual_timestamp *ts, usec_t u) {
+ int64_t delta;
+ assert(ts);
+
+ if (u == USEC_INFINITY) {
+ ts->realtime = ts->monotonic = USEC_INFINITY;
+ return ts;
+ }
+
+ ts->monotonic = u;
+ delta = (int64_t) now(CLOCK_MONOTONIC) - (int64_t) u;
+ ts->realtime = usec_sub_signed(now(CLOCK_REALTIME), delta);
+
+ return ts;
+}
+
+dual_timestamp* dual_timestamp_from_boottime_or_monotonic(dual_timestamp *ts, usec_t u) {
+ int64_t delta;
+
+ if (u == USEC_INFINITY) {
+ ts->realtime = ts->monotonic = USEC_INFINITY;
+ return ts;
+ }
+
+ dual_timestamp_get(ts);
+ delta = (int64_t) now(clock_boottime_or_monotonic()) - (int64_t) u;
+ ts->realtime = usec_sub_signed(ts->realtime, delta);
+ ts->monotonic = usec_sub_signed(ts->monotonic, delta);
+
+ return ts;
+}
+
+usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock) {
+
+ switch (clock) {
+
+ case CLOCK_REALTIME:
+ case CLOCK_REALTIME_ALARM:
+ return ts->realtime;
+
+ case CLOCK_MONOTONIC:
+ return ts->monotonic;
+
+ case CLOCK_BOOTTIME:
+ case CLOCK_BOOTTIME_ALARM:
+ return ts->boottime;
+
+ default:
+ return USEC_INFINITY;
+ }
+}
+
+usec_t timespec_load(const struct timespec *ts) {
+ assert(ts);
+
+ if (ts->tv_sec < 0 || ts->tv_nsec < 0)
+ return USEC_INFINITY;
+
+ if ((usec_t) ts->tv_sec > (UINT64_MAX - (ts->tv_nsec / NSEC_PER_USEC)) / USEC_PER_SEC)
+ return USEC_INFINITY;
+
+ return
+ (usec_t) ts->tv_sec * USEC_PER_SEC +
+ (usec_t) ts->tv_nsec / NSEC_PER_USEC;
+}
+
+nsec_t timespec_load_nsec(const struct timespec *ts) {
+ assert(ts);
+
+ if (ts->tv_sec < 0 || ts->tv_nsec < 0)
+ return NSEC_INFINITY;
+
+ if ((nsec_t) ts->tv_sec >= (UINT64_MAX - ts->tv_nsec) / NSEC_PER_SEC)
+ return NSEC_INFINITY;
+
+ return (nsec_t) ts->tv_sec * NSEC_PER_SEC + (nsec_t) ts->tv_nsec;
+}
+
+struct timespec *timespec_store(struct timespec *ts, usec_t u) {
+ assert(ts);
+
+ if (u == USEC_INFINITY ||
+ u / USEC_PER_SEC >= TIME_T_MAX) {
+ ts->tv_sec = (time_t) -1;
+ ts->tv_nsec = (long) -1;
+ return ts;
+ }
+
+ ts->tv_sec = (time_t) (u / USEC_PER_SEC);
+ ts->tv_nsec = (long int) ((u % USEC_PER_SEC) * NSEC_PER_USEC);
+
+ return ts;
+}
+
+usec_t timeval_load(const struct timeval *tv) {
+ assert(tv);
+
+ if (tv->tv_sec < 0 || tv->tv_usec < 0)
+ return USEC_INFINITY;
+
+ if ((usec_t) tv->tv_sec > (UINT64_MAX - tv->tv_usec) / USEC_PER_SEC)
+ return USEC_INFINITY;
+
+ return
+ (usec_t) tv->tv_sec * USEC_PER_SEC +
+ (usec_t) tv->tv_usec;
+}
+
+struct timeval *timeval_store(struct timeval *tv, usec_t u) {
+ assert(tv);
+
+ if (u == USEC_INFINITY ||
+ u / USEC_PER_SEC > TIME_T_MAX) {
+ tv->tv_sec = (time_t) -1;
+ tv->tv_usec = (suseconds_t) -1;
+ } else {
+ tv->tv_sec = (time_t) (u / USEC_PER_SEC);
+ tv->tv_usec = (suseconds_t) (u % USEC_PER_SEC);
+ }
+
+ return tv;
+}
+
+static char *format_timestamp_internal(
+ char *buf,
+ size_t l,
+ usec_t t,
+ bool utc,
+ bool us) {
+
+ /* The weekdays in non-localized (English) form. We use this instead of the localized form, so that our
+ * generated timestamps may be parsed with parse_timestamp(), and always read the same. */
+ static const char * const weekdays[] = {
+ [0] = "Sun",
+ [1] = "Mon",
+ [2] = "Tue",
+ [3] = "Wed",
+ [4] = "Thu",
+ [5] = "Fri",
+ [6] = "Sat",
+ };
+
+ struct tm tm;
+ time_t sec;
+ size_t n;
+
+ assert(buf);
+
+ if (l <
+ 3 + /* week day */
+ 1 + 10 + /* space and date */
+ 1 + 8 + /* space and time */
+ (us ? 1 + 6 : 0) + /* "." and microsecond part */
+ 1 + 1 + /* space and shortest possible zone */
+ 1)
+ return NULL; /* Not enough space even for the shortest form. */
+ if (t <= 0 || t == USEC_INFINITY)
+ return NULL; /* Timestamp is unset */
+
+ /* Let's not format times with years > 9999 */
+ if (t > USEC_TIMESTAMP_FORMATTABLE_MAX) {
+ assert(l >= STRLEN("--- XXXX-XX-XX XX:XX:XX") + 1);
+ strcpy(buf, "--- XXXX-XX-XX XX:XX:XX");
+ return buf;
+ }
+
+ sec = (time_t) (t / USEC_PER_SEC); /* Round down */
+
+ if (!localtime_or_gmtime_r(&sec, &tm, utc))
+ return NULL;
+
+ /* Start with the week day */
+ assert((size_t) tm.tm_wday < ELEMENTSOF(weekdays));
+ memcpy(buf, weekdays[tm.tm_wday], 4);
+
+ /* Add the main components */
+ if (strftime(buf + 3, l - 3, " %Y-%m-%d %H:%M:%S", &tm) <= 0)
+ return NULL; /* Doesn't fit */
+
+ /* Append the microseconds part, if that's requested */
+ if (us) {
+ n = strlen(buf);
+ if (n + 8 > l)
+ return NULL; /* Microseconds part doesn't fit. */
+
+ sprintf(buf + n, ".%06"PRI_USEC, t % USEC_PER_SEC);
+ }
+
+ /* Append the timezone */
+ n = strlen(buf);
+ if (utc) {
+ /* If this is UTC then let's explicitly use the "UTC" string here, because gmtime_r() normally uses the
+ * obsolete "GMT" instead. */
+ if (n + 5 > l)
+ return NULL; /* "UTC" doesn't fit. */
+
+ strcpy(buf + n, " UTC");
+
+ } else if (!isempty(tm.tm_zone)) {
+ size_t tn;
+
+ /* An explicit timezone is specified, let's use it, if it fits */
+ tn = strlen(tm.tm_zone);
+ if (n + 1 + tn + 1 > l) {
+ /* The full time zone does not fit in. Yuck. */
+
+ if (n + 1 + _POSIX_TZNAME_MAX + 1 > l)
+ return NULL; /* Not even enough space for the POSIX minimum (of 6)? In that case, complain that it doesn't fit */
+
+ /* So the time zone doesn't fit in fully, but the caller passed enough space for the POSIX
+ * minimum time zone length. In this case suppress the timezone entirely, in order not to dump
+ * an overly long, hard to read string on the user. This should be safe, because the user will
+ * assume the local timezone anyway if none is shown. And so does parse_timestamp(). */
+ } else {
+ buf[n++] = ' ';
+ strcpy(buf + n, tm.tm_zone);
+ }
+ }
+
+ return buf;
+}
+
+char *format_timestamp(char *buf, size_t l, usec_t t) {
+ return format_timestamp_internal(buf, l, t, false, false);
+}
+
+char *format_timestamp_utc(char *buf, size_t l, usec_t t) {
+ return format_timestamp_internal(buf, l, t, true, false);
+}
+
+char *format_timestamp_us(char *buf, size_t l, usec_t t) {
+ return format_timestamp_internal(buf, l, t, false, true);
+}
+
+char *format_timestamp_us_utc(char *buf, size_t l, usec_t t) {
+ return format_timestamp_internal(buf, l, t, true, true);
+}
+
+char *format_timestamp_relative(char *buf, size_t l, usec_t t) {
+ const char *s;
+ usec_t n, d;
+
+ if (t <= 0 || t == USEC_INFINITY)
+ return NULL;
+
+ n = now(CLOCK_REALTIME);
+ if (n > t) {
+ d = n - t;
+ s = "ago";
+ } else {
+ d = t - n;
+ s = "left";
+ }
+
+ if (d >= USEC_PER_YEAR)
+ snprintf(buf, l, USEC_FMT " years " USEC_FMT " months %s",
+ d / USEC_PER_YEAR,
+ (d % USEC_PER_YEAR) / USEC_PER_MONTH, s);
+ else if (d >= USEC_PER_MONTH)
+ snprintf(buf, l, USEC_FMT " months " USEC_FMT " days %s",
+ d / USEC_PER_MONTH,
+ (d % USEC_PER_MONTH) / USEC_PER_DAY, s);
+ else if (d >= USEC_PER_WEEK)
+ snprintf(buf, l, USEC_FMT " weeks " USEC_FMT " days %s",
+ d / USEC_PER_WEEK,
+ (d % USEC_PER_WEEK) / USEC_PER_DAY, s);
+ else if (d >= 2*USEC_PER_DAY)
+ snprintf(buf, l, USEC_FMT " days %s", d / USEC_PER_DAY, s);
+ else if (d >= 25*USEC_PER_HOUR)
+ snprintf(buf, l, "1 day " USEC_FMT "h %s",
+ (d - USEC_PER_DAY) / USEC_PER_HOUR, s);
+ else if (d >= 6*USEC_PER_HOUR)
+ snprintf(buf, l, USEC_FMT "h %s",
+ d / USEC_PER_HOUR, s);
+ else if (d >= USEC_PER_HOUR)
+ snprintf(buf, l, USEC_FMT "h " USEC_FMT "min %s",
+ d / USEC_PER_HOUR,
+ (d % USEC_PER_HOUR) / USEC_PER_MINUTE, s);
+ else if (d >= 5*USEC_PER_MINUTE)
+ snprintf(buf, l, USEC_FMT "min %s",
+ d / USEC_PER_MINUTE, s);
+ else if (d >= USEC_PER_MINUTE)
+ snprintf(buf, l, USEC_FMT "min " USEC_FMT "s %s",
+ d / USEC_PER_MINUTE,
+ (d % USEC_PER_MINUTE) / USEC_PER_SEC, s);
+ else if (d >= USEC_PER_SEC)
+ snprintf(buf, l, USEC_FMT "s %s",
+ d / USEC_PER_SEC, s);
+ else if (d >= USEC_PER_MSEC)
+ snprintf(buf, l, USEC_FMT "ms %s",
+ d / USEC_PER_MSEC, s);
+ else if (d > 0)
+ snprintf(buf, l, USEC_FMT"us %s",
+ d, s);
+ else
+ snprintf(buf, l, "now");
+
+ buf[l-1] = 0;
+ return buf;
+}
+
+char *format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy) {
+ static const struct {
+ const char *suffix;
+ usec_t usec;
+ } table[] = {
+ { "y", USEC_PER_YEAR },
+ { "month", USEC_PER_MONTH },
+ { "w", USEC_PER_WEEK },
+ { "d", USEC_PER_DAY },
+ { "h", USEC_PER_HOUR },
+ { "min", USEC_PER_MINUTE },
+ { "s", USEC_PER_SEC },
+ { "ms", USEC_PER_MSEC },
+ { "us", 1 },
+ };
+
+ size_t i;
+ char *p = buf;
+ bool something = false;
+
+ assert(buf);
+ assert(l > 0);
+
+ if (t == USEC_INFINITY) {
+ strncpy(p, "infinity", l-1);
+ p[l-1] = 0;
+ return p;
+ }
+
+ if (t <= 0) {
+ strncpy(p, "0", l-1);
+ p[l-1] = 0;
+ return p;
+ }
+
+ /* The result of this function can be parsed with parse_sec */
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ int k = 0;
+ size_t n;
+ bool done = false;
+ usec_t a, b;
+
+ if (t <= 0)
+ break;
+
+ if (t < accuracy && something)
+ break;
+
+ if (t < table[i].usec)
+ continue;
+
+ if (l <= 1)
+ break;
+
+ a = t / table[i].usec;
+ b = t % table[i].usec;
+
+ /* Let's see if we should shows this in dot notation */
+ if (t < USEC_PER_MINUTE && b > 0) {
+ usec_t cc;
+ signed char j;
+
+ j = 0;
+ for (cc = table[i].usec; cc > 1; cc /= 10)
+ j++;
+
+ for (cc = accuracy; cc > 1; cc /= 10) {
+ b /= 10;
+ j--;
+ }
+
+ if (j > 0) {
+ k = snprintf(p, l,
+ "%s"USEC_FMT".%0*"PRI_USEC"%s",
+ p > buf ? " " : "",
+ a,
+ j,
+ b,
+ table[i].suffix);
+
+ t = 0;
+ done = true;
+ }
+ }
+
+ /* No? Then let's show it normally */
+ if (!done) {
+ k = snprintf(p, l,
+ "%s"USEC_FMT"%s",
+ p > buf ? " " : "",
+ a,
+ table[i].suffix);
+
+ t = b;
+ }
+
+ n = MIN((size_t) k, l);
+
+ l -= n;
+ p += n;
+
+ something = true;
+ }
+
+ *p = 0;
+
+ return buf;
+}
+
+static int parse_timestamp_impl(const char *t, usec_t *usec, bool with_tz) {
+ static const struct {
+ const char *name;
+ const int nr;
+ } day_nr[] = {
+ { "Sunday", 0 },
+ { "Sun", 0 },
+ { "Monday", 1 },
+ { "Mon", 1 },
+ { "Tuesday", 2 },
+ { "Tue", 2 },
+ { "Wednesday", 3 },
+ { "Wed", 3 },
+ { "Thursday", 4 },
+ { "Thu", 4 },
+ { "Friday", 5 },
+ { "Fri", 5 },
+ { "Saturday", 6 },
+ { "Sat", 6 },
+ };
+
+ const char *k, *utc = NULL, *tzn = NULL;
+ struct tm tm, copy;
+ time_t x;
+ usec_t x_usec, plus = 0, minus = 0, ret;
+ int r, weekday = -1, dst = -1;
+ size_t i;
+
+ /* Allowed syntaxes:
+ *
+ * 2012-09-22 16:34:22
+ * 2012-09-22 16:34 (seconds will be set to 0)
+ * 2012-09-22 (time will be set to 00:00:00)
+ * 16:34:22 (date will be set to today)
+ * 16:34 (date will be set to today, seconds to 0)
+ * now
+ * yesterday (time is set to 00:00:00)
+ * today (time is set to 00:00:00)
+ * tomorrow (time is set to 00:00:00)
+ * +5min
+ * -5days
+ * @2147483647 (seconds since epoch)
+ */
+
+ assert(t);
+ assert(usec);
+
+ if (t[0] == '@' && !with_tz)
+ return parse_sec(t + 1, usec);
+
+ ret = now(CLOCK_REALTIME);
+
+ if (!with_tz) {
+ if (streq(t, "now"))
+ goto finish;
+
+ else if (t[0] == '+') {
+ r = parse_sec(t+1, &plus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+
+ } else if (t[0] == '-') {
+ r = parse_sec(t+1, &minus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+
+ } else if ((k = endswith(t, " ago"))) {
+ t = strndupa(t, k - t);
+
+ r = parse_sec(t, &minus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+
+ } else if ((k = endswith(t, " left"))) {
+ t = strndupa(t, k - t);
+
+ r = parse_sec(t, &plus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+ }
+
+ /* See if the timestamp is suffixed with UTC */
+ utc = endswith_no_case(t, " UTC");
+ if (utc)
+ t = strndupa(t, utc - t);
+ else {
+ const char *e = NULL;
+ int j;
+
+ tzset();
+
+ /* See if the timestamp is suffixed by either the DST or non-DST local timezone. Note that we only
+ * support the local timezones here, nothing else. Not because we wouldn't want to, but simply because
+ * there are no nice APIs available to cover this. By accepting the local time zone strings, we make
+ * sure that all timestamps written by format_timestamp() can be parsed correctly, even though we don't
+ * support arbitrary timezone specifications. */
+
+ for (j = 0; j <= 1; j++) {
+
+ if (isempty(tzname[j]))
+ continue;
+
+ e = endswith_no_case(t, tzname[j]);
+ if (!e)
+ continue;
+ if (e == t)
+ continue;
+ if (e[-1] != ' ')
+ continue;
+
+ break;
+ }
+
+ if (IN_SET(j, 0, 1)) {
+ /* Found one of the two timezones specified. */
+ t = strndupa(t, e - t - 1);
+ dst = j;
+ tzn = tzname[j];
+ }
+ }
+ }
+
+ x = (time_t) (ret / USEC_PER_SEC);
+ x_usec = 0;
+
+ if (!localtime_or_gmtime_r(&x, &tm, utc))
+ return -EINVAL;
+
+ tm.tm_isdst = dst;
+ if (!with_tz && tzn)
+ tm.tm_zone = tzn;
+
+ if (streq(t, "today")) {
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+
+ } else if (streq(t, "yesterday")) {
+ tm.tm_mday--;
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+
+ } else if (streq(t, "tomorrow")) {
+ tm.tm_mday++;
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+ }
+
+ for (i = 0; i < ELEMENTSOF(day_nr); i++) {
+ size_t skip;
+
+ if (!startswith_no_case(t, day_nr[i].name))
+ continue;
+
+ skip = strlen(day_nr[i].name);
+ if (t[skip] != ' ')
+ continue;
+
+ weekday = day_nr[i].nr;
+ t += skip + 1;
+ break;
+ }
+
+ copy = tm;
+ k = strptime(t, "%y-%m-%d %H:%M:%S", &tm);
+ if (k) {
+ if (*k == '.')
+ goto parse_usec;
+ else if (*k == 0)
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%Y-%m-%d %H:%M:%S", &tm);
+ if (k) {
+ if (*k == '.')
+ goto parse_usec;
+ else if (*k == 0)
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%y-%m-%d %H:%M", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%Y-%m-%d %H:%M", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%y-%m-%d", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%Y-%m-%d", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%H:%M:%S", &tm);
+ if (k) {
+ if (*k == '.')
+ goto parse_usec;
+ else if (*k == 0)
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%H:%M", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = 0;
+ goto from_tm;
+ }
+
+ return -EINVAL;
+
+parse_usec:
+ {
+ unsigned add;
+
+ k++;
+ r = parse_fractional_part_u(&k, 6, &add);
+ if (r < 0)
+ return -EINVAL;
+
+ if (*k)
+ return -EINVAL;
+
+ x_usec = add;
+ }
+
+from_tm:
+ if (weekday >= 0 && tm.tm_wday != weekday)
+ return -EINVAL;
+
+ x = mktime_or_timegm(&tm, utc);
+ if (x < 0)
+ return -EINVAL;
+
+ ret = (usec_t) x * USEC_PER_SEC + x_usec;
+ if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX)
+ return -EINVAL;
+
+finish:
+ if (ret + plus < ret) /* overflow? */
+ return -EINVAL;
+ ret += plus;
+ if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX)
+ return -EINVAL;
+
+ if (ret >= minus)
+ ret -= minus;
+ else
+ return -EINVAL;
+
+ *usec = ret;
+
+ return 0;
+}
+
+typedef struct ParseTimestampResult {
+ usec_t usec;
+ int return_value;
+} ParseTimestampResult;
+
+int parse_timestamp(const char *t, usec_t *usec) {
+ char *last_space, *tz = NULL;
+ ParseTimestampResult *shared, tmp;
+ int r;
+
+ last_space = strrchr(t, ' ');
+ if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG))
+ tz = last_space + 1;
+
+ if (!tz || endswith_no_case(t, " UTC"))
+ return parse_timestamp_impl(t, usec, false);
+
+ shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+ if (shared == MAP_FAILED)
+ return negative_errno();
+
+ r = safe_fork("(sd-timestamp)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT, NULL);
+ if (r < 0) {
+ (void) munmap(shared, sizeof *shared);
+ return r;
+ }
+ if (r == 0) {
+ bool with_tz = true;
+
+ if (setenv("TZ", tz, 1) != 0) {
+ shared->return_value = negative_errno();
+ _exit(EXIT_FAILURE);
+ }
+
+ tzset();
+
+ /* If there is a timezone that matches the tzname fields, leave the parsing to the implementation.
+ * Otherwise just cut it off. */
+ with_tz = !STR_IN_SET(tz, tzname[0], tzname[1]);
+
+ /* Cut off the timezone if we don't need it. */
+ if (with_tz)
+ t = strndupa(t, last_space - t);
+
+ shared->return_value = parse_timestamp_impl(t, &shared->usec, with_tz);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ tmp = *shared;
+ if (munmap(shared, sizeof *shared) != 0)
+ return negative_errno();
+
+ if (tmp.return_value == 0)
+ *usec = tmp.usec;
+
+ return tmp.return_value;
+}
+
+static const char* extract_multiplier(const char *p, usec_t *multiplier) {
+ static const struct {
+ const char *suffix;
+ usec_t usec;
+ } table[] = {
+ { "seconds", USEC_PER_SEC },
+ { "second", USEC_PER_SEC },
+ { "sec", USEC_PER_SEC },
+ { "s", USEC_PER_SEC },
+ { "minutes", USEC_PER_MINUTE },
+ { "minute", USEC_PER_MINUTE },
+ { "min", USEC_PER_MINUTE },
+ { "months", USEC_PER_MONTH },
+ { "month", USEC_PER_MONTH },
+ { "M", USEC_PER_MONTH },
+ { "msec", USEC_PER_MSEC },
+ { "ms", USEC_PER_MSEC },
+ { "m", USEC_PER_MINUTE },
+ { "hours", USEC_PER_HOUR },
+ { "hour", USEC_PER_HOUR },
+ { "hr", USEC_PER_HOUR },
+ { "h", USEC_PER_HOUR },
+ { "days", USEC_PER_DAY },
+ { "day", USEC_PER_DAY },
+ { "d", USEC_PER_DAY },
+ { "weeks", USEC_PER_WEEK },
+ { "week", USEC_PER_WEEK },
+ { "w", USEC_PER_WEEK },
+ { "years", USEC_PER_YEAR },
+ { "year", USEC_PER_YEAR },
+ { "y", USEC_PER_YEAR },
+ { "usec", 1ULL },
+ { "us", 1ULL },
+ { "µs", 1ULL },
+ };
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ char *e;
+
+ e = startswith(p, table[i].suffix);
+ if (e) {
+ *multiplier = table[i].usec;
+ return e;
+ }
+ }
+
+ return p;
+}
+
+int parse_time(const char *t, usec_t *usec, usec_t default_unit) {
+ const char *p, *s;
+ usec_t r = 0;
+ bool something = false;
+
+ assert(t);
+ assert(usec);
+ assert(default_unit > 0);
+
+ p = t;
+
+ p += strspn(p, WHITESPACE);
+ s = startswith(p, "infinity");
+ if (s) {
+ s += strspn(s, WHITESPACE);
+ if (*s != 0)
+ return -EINVAL;
+
+ *usec = USEC_INFINITY;
+ return 0;
+ }
+
+ for (;;) {
+ usec_t multiplier = default_unit, k;
+ long long l;
+ char *e;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p == 0) {
+ if (!something)
+ return -EINVAL;
+
+ break;
+ }
+
+ if (*p == '-') /* Don't allow "-0" */
+ return -ERANGE;
+
+ errno = 0;
+ l = strtoll(p, &e, 10);
+ if (errno > 0)
+ return -errno;
+ if (l < 0)
+ return -ERANGE;
+
+ if (*e == '.') {
+ p = e + 1;
+ p += strspn(p, DIGITS);
+ } else if (e == p)
+ return -EINVAL;
+ else
+ p = e;
+
+ s = extract_multiplier(p + strspn(p, WHITESPACE), &multiplier);
+ if (s == p && *s != '\0')
+ /* Don't allow '12.34.56', but accept '12.34 .56' or '12.34s.56'*/
+ return -EINVAL;
+
+ p = s;
+
+ if ((usec_t) l >= USEC_INFINITY / multiplier)
+ return -ERANGE;
+
+ k = (usec_t) l * multiplier;
+ if (k >= USEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+
+ something = true;
+
+ if (*e == '.') {
+ usec_t m = multiplier / 10;
+ const char *b;
+
+ for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) {
+ k = (usec_t) (*b - '0') * m;
+ if (k >= USEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+ }
+
+ /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge"*/
+ if (b == e + 1)
+ return -EINVAL;
+ }
+ }
+
+ *usec = r;
+
+ return 0;
+}
+
+int parse_sec(const char *t, usec_t *usec) {
+ return parse_time(t, usec, USEC_PER_SEC);
+}
+
+int parse_sec_fix_0(const char *t, usec_t *ret) {
+ usec_t k;
+ int r;
+
+ assert(t);
+ assert(ret);
+
+ r = parse_sec(t, &k);
+ if (r < 0)
+ return r;
+
+ *ret = k == 0 ? USEC_INFINITY : k;
+ return r;
+}
+
+static const char* extract_nsec_multiplier(const char *p, nsec_t *multiplier) {
+ static const struct {
+ const char *suffix;
+ nsec_t nsec;
+ } table[] = {
+ { "seconds", NSEC_PER_SEC },
+ { "second", NSEC_PER_SEC },
+ { "sec", NSEC_PER_SEC },
+ { "s", NSEC_PER_SEC },
+ { "minutes", NSEC_PER_MINUTE },
+ { "minute", NSEC_PER_MINUTE },
+ { "min", NSEC_PER_MINUTE },
+ { "months", NSEC_PER_MONTH },
+ { "month", NSEC_PER_MONTH },
+ { "M", NSEC_PER_MONTH },
+ { "msec", NSEC_PER_MSEC },
+ { "ms", NSEC_PER_MSEC },
+ { "m", NSEC_PER_MINUTE },
+ { "hours", NSEC_PER_HOUR },
+ { "hour", NSEC_PER_HOUR },
+ { "hr", NSEC_PER_HOUR },
+ { "h", NSEC_PER_HOUR },
+ { "days", NSEC_PER_DAY },
+ { "day", NSEC_PER_DAY },
+ { "d", NSEC_PER_DAY },
+ { "weeks", NSEC_PER_WEEK },
+ { "week", NSEC_PER_WEEK },
+ { "w", NSEC_PER_WEEK },
+ { "years", NSEC_PER_YEAR },
+ { "year", NSEC_PER_YEAR },
+ { "y", NSEC_PER_YEAR },
+ { "usec", NSEC_PER_USEC },
+ { "us", NSEC_PER_USEC },
+ { "µs", NSEC_PER_USEC },
+ { "nsec", 1ULL },
+ { "ns", 1ULL },
+ { "", 1ULL }, /* default is nsec */
+ };
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ char *e;
+
+ e = startswith(p, table[i].suffix);
+ if (e) {
+ *multiplier = table[i].nsec;
+ return e;
+ }
+ }
+
+ return p;
+}
+
+int parse_nsec(const char *t, nsec_t *nsec) {
+ const char *p, *s;
+ nsec_t r = 0;
+ bool something = false;
+
+ assert(t);
+ assert(nsec);
+
+ p = t;
+
+ p += strspn(p, WHITESPACE);
+ s = startswith(p, "infinity");
+ if (s) {
+ s += strspn(s, WHITESPACE);
+ if (*s != 0)
+ return -EINVAL;
+
+ *nsec = NSEC_INFINITY;
+ return 0;
+ }
+
+ for (;;) {
+ nsec_t multiplier = 1, k;
+ long long l;
+ char *e;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p == 0) {
+ if (!something)
+ return -EINVAL;
+
+ break;
+ }
+
+ if (*p == '-') /* Don't allow "-0" */
+ return -ERANGE;
+
+ errno = 0;
+ l = strtoll(p, &e, 10);
+ if (errno > 0)
+ return -errno;
+ if (l < 0)
+ return -ERANGE;
+
+ if (*e == '.') {
+ p = e + 1;
+ p += strspn(p, DIGITS);
+ } else if (e == p)
+ return -EINVAL;
+ else
+ p = e;
+
+ s = extract_nsec_multiplier(p + strspn(p, WHITESPACE), &multiplier);
+ if (s == p && *s != '\0')
+ /* Don't allow '12.34.56', but accept '12.34 .56' or '12.34s.56'*/
+ return -EINVAL;
+
+ p = s;
+
+ if ((nsec_t) l >= NSEC_INFINITY / multiplier)
+ return -ERANGE;
+
+ k = (nsec_t) l * multiplier;
+ if (k >= NSEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+
+ something = true;
+
+ if (*e == '.') {
+ nsec_t m = multiplier / 10;
+ const char *b;
+
+ for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) {
+ k = (nsec_t) (*b - '0') * m;
+ if (k >= NSEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+ }
+
+ /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge"*/
+ if (b == e + 1)
+ return -EINVAL;
+ }
+ }
+
+ *nsec = r;
+
+ return 0;
+}
+
+bool ntp_synced(void) {
+ struct timex txc = {};
+
+ if (adjtimex(&txc) < 0)
+ return false;
+
+ if (txc.status & STA_UNSYNC)
+ return false;
+
+ return true;
+}
+
+int get_timezones(char ***ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **zones = NULL;
+ size_t n_zones = 0, n_allocated = 0;
+ int r;
+
+ assert(ret);
+
+ zones = strv_new("UTC");
+ if (!zones)
+ return -ENOMEM;
+
+ n_allocated = 2;
+ n_zones = 1;
+
+ f = fopen("/usr/share/zoneinfo/zone.tab", "re");
+ if (f) {
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *p, *w;
+ size_t k;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ p = strstrip(line);
+
+ if (isempty(p) || *p == '#')
+ continue;
+
+ /* Skip over country code */
+ p += strcspn(p, WHITESPACE);
+ p += strspn(p, WHITESPACE);
+
+ /* Skip over coordinates */
+ p += strcspn(p, WHITESPACE);
+ p += strspn(p, WHITESPACE);
+
+ /* Found timezone name */
+ k = strcspn(p, WHITESPACE);
+ if (k <= 0)
+ continue;
+
+ w = strndup(p, k);
+ if (!w)
+ return -ENOMEM;
+
+ if (!GREEDY_REALLOC(zones, n_allocated, n_zones + 2)) {
+ free(w);
+ return -ENOMEM;
+ }
+
+ zones[n_zones++] = w;
+ zones[n_zones] = NULL;
+ }
+
+ strv_sort(zones);
+
+ } else if (errno != ENOENT)
+ return -errno;
+
+ *ret = TAKE_PTR(zones);
+
+ return 0;
+}
+
+bool timezone_is_valid(const char *name, int log_level) {
+ bool slash = false;
+ const char *p, *t;
+ _cleanup_close_ int fd = -1;
+ char buf[4];
+ int r;
+
+ if (isempty(name))
+ return false;
+
+ if (name[0] == '/')
+ return false;
+
+ for (p = name; *p; p++) {
+ if (!(*p >= '0' && *p <= '9') &&
+ !(*p >= 'a' && *p <= 'z') &&
+ !(*p >= 'A' && *p <= 'Z') &&
+ !IN_SET(*p, '-', '_', '+', '/'))
+ return false;
+
+ if (*p == '/') {
+
+ if (slash)
+ return false;
+
+ slash = true;
+ } else
+ slash = false;
+ }
+
+ if (slash)
+ return false;
+
+ if (p - name >= PATH_MAX)
+ return false;
+
+ t = strjoina("/usr/share/zoneinfo/", name);
+
+ fd = open(t, O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ log_full_errno(log_level, errno, "Failed to open timezone file '%s': %m", t);
+ return false;
+ }
+
+ r = fd_verify_regular(fd);
+ if (r < 0) {
+ log_full_errno(log_level, r, "Timezone file '%s' is not a regular file: %m", t);
+ return false;
+ }
+
+ r = loop_read_exact(fd, buf, 4, false);
+ if (r < 0) {
+ log_full_errno(log_level, r, "Failed to read from timezone file '%s': %m", t);
+ return false;
+ }
+
+ /* Magic from tzfile(5) */
+ if (memcmp(buf, "TZif", 4) != 0) {
+ log_full(log_level, "Timezone file '%s' has wrong magic bytes", t);
+ return false;
+ }
+
+ return true;
+}
+
+bool clock_boottime_supported(void) {
+ static int supported = -1;
+
+ /* Note that this checks whether CLOCK_BOOTTIME is available in general as well as available for timerfds()! */
+
+ if (supported < 0) {
+ int fd;
+
+ fd = timerfd_create(CLOCK_BOOTTIME, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (fd < 0)
+ supported = false;
+ else {
+ safe_close(fd);
+ supported = true;
+ }
+ }
+
+ return supported;
+}
+
+clockid_t clock_boottime_or_monotonic(void) {
+ if (clock_boottime_supported())
+ return CLOCK_BOOTTIME;
+ else
+ return CLOCK_MONOTONIC;
+}
+
+bool clock_supported(clockid_t clock) {
+ struct timespec ts;
+
+ switch (clock) {
+
+ case CLOCK_MONOTONIC:
+ case CLOCK_REALTIME:
+ return true;
+
+ case CLOCK_BOOTTIME:
+ return clock_boottime_supported();
+
+ case CLOCK_BOOTTIME_ALARM:
+ if (!clock_boottime_supported())
+ return false;
+
+ _fallthrough_;
+ default:
+ /* For everything else, check properly */
+ return clock_gettime(clock, &ts) >= 0;
+ }
+}
+
+int get_timezone(char **tz) {
+ _cleanup_free_ char *t = NULL;
+ const char *e;
+ char *z;
+ int r;
+
+ r = readlink_malloc("/etc/localtime", &t);
+ if (r < 0)
+ return r; /* returns EINVAL if not a symlink */
+
+ e = PATH_STARTSWITH_SET(t, "/usr/share/zoneinfo/", "../usr/share/zoneinfo/");
+ if (!e)
+ return -EINVAL;
+
+ if (!timezone_is_valid(e, LOG_DEBUG))
+ return -EINVAL;
+
+ z = strdup(e);
+ if (!z)
+ return -ENOMEM;
+
+ *tz = z;
+ return 0;
+}
+
+time_t mktime_or_timegm(struct tm *tm, bool utc) {
+ return utc ? timegm(tm) : mktime(tm);
+}
+
+struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc) {
+ return utc ? gmtime_r(t, tm) : localtime_r(t, tm);
+}
+
+unsigned long usec_to_jiffies(usec_t u) {
+ static thread_local unsigned long hz = 0;
+ long r;
+
+ if (hz == 0) {
+ r = sysconf(_SC_CLK_TCK);
+
+ assert(r > 0);
+ hz = r;
+ }
+
+ return DIV_ROUND_UP(u , USEC_PER_SEC / hz);
+}
+
+usec_t usec_shift_clock(usec_t x, clockid_t from, clockid_t to) {
+ usec_t a, b;
+
+ if (x == USEC_INFINITY)
+ return USEC_INFINITY;
+ if (map_clock_id(from) == map_clock_id(to))
+ return x;
+
+ a = now(from);
+ b = now(to);
+
+ if (x > a)
+ /* x lies in the future */
+ return usec_add(b, usec_sub_unsigned(x, a));
+ else
+ /* x lies in the past */
+ return usec_sub_unsigned(b, usec_sub_unsigned(a, x));
+}
+
+bool in_utc_timezone(void) {
+ tzset();
+
+ return timezone == 0 && daylight == 0;
+}
+
+int time_change_fd(void) {
+
+ /* We only care for the cancellation event, hence we set the timeout to the latest possible value. */
+ static const struct itimerspec its = {
+ .it_value.tv_sec = TIME_T_MAX,
+ };
+
+ _cleanup_close_ int fd;
+
+ assert_cc(sizeof(time_t) == sizeof(TIME_T_MAX));
+
+ /* Uses TFD_TIMER_CANCEL_ON_SET to get notifications whenever CLOCK_REALTIME makes a jump relative to
+ * CLOCK_MONOTONIC. */
+
+ fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (timerfd_settime(fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its, NULL) < 0)
+ return -errno;
+
+ return TAKE_FD(fd);
+}
diff --git a/src/basic/time-util.h b/src/basic/time-util.h
new file mode 100644
index 0000000..5316305
--- /dev/null
+++ b/src/basic/time-util.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+
+typedef uint64_t usec_t;
+typedef uint64_t nsec_t;
+
+#define PRI_NSEC PRIu64
+#define PRI_USEC PRIu64
+#define NSEC_FMT "%" PRI_NSEC
+#define USEC_FMT "%" PRI_USEC
+
+#include "macro.h"
+
+typedef struct dual_timestamp {
+ usec_t realtime;
+ usec_t monotonic;
+} dual_timestamp;
+
+typedef struct triple_timestamp {
+ usec_t realtime;
+ usec_t monotonic;
+ usec_t boottime;
+} triple_timestamp;
+
+#define USEC_INFINITY ((usec_t) -1)
+#define NSEC_INFINITY ((nsec_t) -1)
+
+#define MSEC_PER_SEC 1000ULL
+#define USEC_PER_SEC ((usec_t) 1000000ULL)
+#define USEC_PER_MSEC ((usec_t) 1000ULL)
+#define NSEC_PER_SEC ((nsec_t) 1000000000ULL)
+#define NSEC_PER_MSEC ((nsec_t) 1000000ULL)
+#define NSEC_PER_USEC ((nsec_t) 1000ULL)
+
+#define USEC_PER_MINUTE ((usec_t) (60ULL*USEC_PER_SEC))
+#define NSEC_PER_MINUTE ((nsec_t) (60ULL*NSEC_PER_SEC))
+#define USEC_PER_HOUR ((usec_t) (60ULL*USEC_PER_MINUTE))
+#define NSEC_PER_HOUR ((nsec_t) (60ULL*NSEC_PER_MINUTE))
+#define USEC_PER_DAY ((usec_t) (24ULL*USEC_PER_HOUR))
+#define NSEC_PER_DAY ((nsec_t) (24ULL*NSEC_PER_HOUR))
+#define USEC_PER_WEEK ((usec_t) (7ULL*USEC_PER_DAY))
+#define NSEC_PER_WEEK ((nsec_t) (7ULL*NSEC_PER_DAY))
+#define USEC_PER_MONTH ((usec_t) (2629800ULL*USEC_PER_SEC))
+#define NSEC_PER_MONTH ((nsec_t) (2629800ULL*NSEC_PER_SEC))
+#define USEC_PER_YEAR ((usec_t) (31557600ULL*USEC_PER_SEC))
+#define NSEC_PER_YEAR ((nsec_t) (31557600ULL*NSEC_PER_SEC))
+
+/* We assume a maximum timezone length of 6. TZNAME_MAX is not defined on Linux, but glibc internally initializes this
+ * to 6. Let's rely on that. */
+#define FORMAT_TIMESTAMP_MAX (3+1+10+1+8+1+6+1+6+1)
+#define FORMAT_TIMESTAMP_WIDTH 28 /* when outputting, assume this width */
+#define FORMAT_TIMESTAMP_RELATIVE_MAX 256
+#define FORMAT_TIMESPAN_MAX 64
+
+#define TIME_T_MAX (time_t)((UINTMAX_C(1) << ((sizeof(time_t) << 3) - 1)) - 1)
+
+#define DUAL_TIMESTAMP_NULL ((struct dual_timestamp) {})
+#define TRIPLE_TIMESTAMP_NULL ((struct triple_timestamp) {})
+
+usec_t now(clockid_t clock);
+nsec_t now_nsec(clockid_t clock);
+
+dual_timestamp* dual_timestamp_get(dual_timestamp *ts);
+dual_timestamp* dual_timestamp_from_realtime(dual_timestamp *ts, usec_t u);
+dual_timestamp* dual_timestamp_from_monotonic(dual_timestamp *ts, usec_t u);
+dual_timestamp* dual_timestamp_from_boottime_or_monotonic(dual_timestamp *ts, usec_t u);
+
+triple_timestamp* triple_timestamp_get(triple_timestamp *ts);
+triple_timestamp* triple_timestamp_from_realtime(triple_timestamp *ts, usec_t u);
+
+#define DUAL_TIMESTAMP_HAS_CLOCK(clock) \
+ IN_SET(clock, CLOCK_REALTIME, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC)
+
+#define TRIPLE_TIMESTAMP_HAS_CLOCK(clock) \
+ IN_SET(clock, CLOCK_REALTIME, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM)
+
+static inline bool dual_timestamp_is_set(const dual_timestamp *ts) {
+ return ((ts->realtime > 0 && ts->realtime != USEC_INFINITY) ||
+ (ts->monotonic > 0 && ts->monotonic != USEC_INFINITY));
+}
+
+static inline bool triple_timestamp_is_set(const triple_timestamp *ts) {
+ return ((ts->realtime > 0 && ts->realtime != USEC_INFINITY) ||
+ (ts->monotonic > 0 && ts->monotonic != USEC_INFINITY) ||
+ (ts->boottime > 0 && ts->boottime != USEC_INFINITY));
+}
+
+usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock);
+
+usec_t timespec_load(const struct timespec *ts) _pure_;
+nsec_t timespec_load_nsec(const struct timespec *ts) _pure_;
+struct timespec *timespec_store(struct timespec *ts, usec_t u);
+
+usec_t timeval_load(const struct timeval *tv) _pure_;
+struct timeval *timeval_store(struct timeval *tv, usec_t u);
+
+char *format_timestamp(char *buf, size_t l, usec_t t);
+char *format_timestamp_utc(char *buf, size_t l, usec_t t);
+char *format_timestamp_us(char *buf, size_t l, usec_t t);
+char *format_timestamp_us_utc(char *buf, size_t l, usec_t t);
+char *format_timestamp_relative(char *buf, size_t l, usec_t t);
+char *format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy);
+
+int parse_timestamp(const char *t, usec_t *usec);
+
+int parse_sec(const char *t, usec_t *usec);
+int parse_sec_fix_0(const char *t, usec_t *usec);
+int parse_time(const char *t, usec_t *usec, usec_t default_unit);
+int parse_nsec(const char *t, nsec_t *nsec);
+
+bool ntp_synced(void);
+
+int get_timezones(char ***l);
+bool timezone_is_valid(const char *name, int log_level);
+
+bool clock_boottime_supported(void);
+bool clock_supported(clockid_t clock);
+clockid_t clock_boottime_or_monotonic(void);
+
+usec_t usec_shift_clock(usec_t, clockid_t from, clockid_t to);
+
+int get_timezone(char **timezone);
+
+time_t mktime_or_timegm(struct tm *tm, bool utc);
+struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc);
+
+unsigned long usec_to_jiffies(usec_t usec);
+
+bool in_utc_timezone(void);
+
+static inline usec_t usec_add(usec_t a, usec_t b) {
+ usec_t c;
+
+ /* Adds two time values, and makes sure USEC_INFINITY as input results as USEC_INFINITY in output, and doesn't
+ * overflow. */
+
+ c = a + b;
+ if (c < a || c < b) /* overflow check */
+ return USEC_INFINITY;
+
+ return c;
+}
+
+static inline usec_t usec_sub_unsigned(usec_t timestamp, usec_t delta) {
+
+ if (timestamp == USEC_INFINITY) /* Make sure infinity doesn't degrade */
+ return USEC_INFINITY;
+ if (timestamp < delta)
+ return 0;
+
+ return timestamp - delta;
+}
+
+static inline usec_t usec_sub_signed(usec_t timestamp, int64_t delta) {
+ if (delta < 0)
+ return usec_add(timestamp, (usec_t) (-delta));
+ else
+ return usec_sub_unsigned(timestamp, (usec_t) delta);
+}
+
+#if SIZEOF_TIME_T == 8
+/* The last second we can format is 31. Dec 9999, 1s before midnight, because otherwise we'd enter 5 digit year
+ * territory. However, since we want to stay away from this in all timezones we take one day off. */
+#define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 253402214399000000)
+#elif SIZEOF_TIME_T == 4
+/* With a 32bit time_t we can't go beyond 2038... */
+#define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 2147483647000000)
+#else
+#error "Yuck, time_t is neither 4 nor 8 bytes wide?"
+#endif
+
+int time_change_fd(void);
diff --git a/src/basic/tmpfile-util.c b/src/basic/tmpfile-util.c
new file mode 100644
index 0000000..bc92d6a
--- /dev/null
+++ b/src/basic/tmpfile-util.c
@@ -0,0 +1,330 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mman.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "memfd-util.h"
+#include "missing_fcntl.h"
+#include "missing_syscall.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+
+int fopen_temporary(const char *path, FILE **_f, char **_temp_path) {
+ FILE *f;
+ char *t;
+ int r, fd;
+
+ assert(path);
+ assert(_f);
+ assert(_temp_path);
+
+ r = tempfn_xxxxxx(path, NULL, &t);
+ if (r < 0)
+ return r;
+
+ fd = mkostemp_safe(t);
+ if (fd < 0) {
+ free(t);
+ return -errno;
+ }
+
+ f = fdopen(fd, "w");
+ if (!f) {
+ unlink_noerrno(t);
+ free(t);
+ safe_close(fd);
+ return -errno;
+ }
+
+ *_f = f;
+ *_temp_path = t;
+
+ return 0;
+}
+
+/* This is much like mkostemp() but is subject to umask(). */
+int mkostemp_safe(char *pattern) {
+ _cleanup_umask_ mode_t u = 0;
+ int fd;
+
+ assert(pattern);
+
+ u = umask(077);
+
+ fd = mkostemp(pattern, O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int fmkostemp_safe(char *pattern, const char *mode, FILE **ret_f) {
+ int fd;
+ FILE *f;
+
+ fd = mkostemp_safe(pattern);
+ if (fd < 0)
+ return fd;
+
+ f = fdopen(fd, mode);
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ *ret_f = f;
+ return 0;
+}
+
+int tempfn_xxxxxx(const char *p, const char *extra, char **ret) {
+ const char *fn;
+ char *t;
+
+ assert(ret);
+
+ if (isempty(p))
+ return -EINVAL;
+ if (path_equal(p, "/"))
+ return -EINVAL;
+
+ /*
+ * Turns this:
+ * /foo/bar/waldo
+ *
+ * Into this:
+ * /foo/bar/.#<extra>waldoXXXXXX
+ */
+
+ fn = basename(p);
+ if (!filename_is_valid(fn))
+ return -EINVAL;
+
+ extra = strempty(extra);
+
+ t = new(char, strlen(p) + 2 + strlen(extra) + 6 + 1);
+ if (!t)
+ return -ENOMEM;
+
+ strcpy(stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), extra), fn), "XXXXXX");
+
+ *ret = path_simplify(t, false);
+ return 0;
+}
+
+int tempfn_random(const char *p, const char *extra, char **ret) {
+ const char *fn;
+ char *t, *x;
+ uint64_t u;
+ unsigned i;
+
+ assert(ret);
+
+ if (isempty(p))
+ return -EINVAL;
+ if (path_equal(p, "/"))
+ return -EINVAL;
+
+ /*
+ * Turns this:
+ * /foo/bar/waldo
+ *
+ * Into this:
+ * /foo/bar/.#<extra>waldobaa2a261115984a9
+ */
+
+ fn = basename(p);
+ if (!filename_is_valid(fn))
+ return -EINVAL;
+
+ extra = strempty(extra);
+
+ t = new(char, strlen(p) + 2 + strlen(extra) + 16 + 1);
+ if (!t)
+ return -ENOMEM;
+
+ x = stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), extra), fn);
+
+ u = random_u64();
+ for (i = 0; i < 16; i++) {
+ *(x++) = hexchar(u & 0xF);
+ u >>= 4;
+ }
+
+ *x = 0;
+
+ *ret = path_simplify(t, false);
+ return 0;
+}
+
+int tempfn_random_child(const char *p, const char *extra, char **ret) {
+ char *t, *x;
+ uint64_t u;
+ unsigned i;
+ int r;
+
+ assert(ret);
+
+ /* Turns this:
+ * /foo/bar/waldo
+ * Into this:
+ * /foo/bar/waldo/.#<extra>3c2b6219aa75d7d0
+ */
+
+ if (!p) {
+ r = tmp_dir(&p);
+ if (r < 0)
+ return r;
+ }
+
+ extra = strempty(extra);
+
+ t = new(char, strlen(p) + 3 + strlen(extra) + 16 + 1);
+ if (!t)
+ return -ENOMEM;
+
+ if (isempty(p))
+ x = stpcpy(stpcpy(t, ".#"), extra);
+ else
+ x = stpcpy(stpcpy(stpcpy(t, p), "/.#"), extra);
+
+ u = random_u64();
+ for (i = 0; i < 16; i++) {
+ *(x++) = hexchar(u & 0xF);
+ u >>= 4;
+ }
+
+ *x = 0;
+
+ *ret = path_simplify(t, false);
+ return 0;
+}
+
+int open_tmpfile_unlinkable(const char *directory, int flags) {
+ char *p;
+ int fd, r;
+
+ if (!directory) {
+ r = tmp_dir(&directory);
+ if (r < 0)
+ return r;
+ } else if (isempty(directory))
+ return -EINVAL;
+
+ /* Returns an unlinked temporary file that cannot be linked into the file system anymore */
+
+ /* Try O_TMPFILE first, if it is supported */
+ fd = open(directory, flags|O_TMPFILE|O_EXCL, S_IRUSR|S_IWUSR);
+ if (fd >= 0)
+ return fd;
+
+ /* Fall back to unguessable name + unlinking */
+ p = strjoina(directory, "/systemd-tmp-XXXXXX");
+
+ fd = mkostemp_safe(p);
+ if (fd < 0)
+ return fd;
+
+ (void) unlink(p);
+
+ return fd;
+}
+
+int open_tmpfile_linkable(const char *target, int flags, char **ret_path) {
+ _cleanup_free_ char *tmp = NULL;
+ int r, fd;
+
+ assert(target);
+ assert(ret_path);
+
+ /* Don't allow O_EXCL, as that has a special meaning for O_TMPFILE */
+ assert((flags & O_EXCL) == 0);
+
+ /* Creates a temporary file, that shall be renamed to "target" later. If possible, this uses O_TMPFILE – in
+ * which case "ret_path" will be returned as NULL. If not possible a the tempoary path name used is returned in
+ * "ret_path". Use link_tmpfile() below to rename the result after writing the file in full. */
+
+ fd = open_parent(target, O_TMPFILE|flags, 0640);
+ if (fd >= 0) {
+ *ret_path = NULL;
+ return fd;
+ }
+
+ log_debug_errno(fd, "Failed to use O_TMPFILE for %s: %m", target);
+
+ r = tempfn_random(target, NULL, &tmp);
+ if (r < 0)
+ return r;
+
+ fd = open(tmp, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|flags, 0640);
+ if (fd < 0)
+ return -errno;
+
+ *ret_path = TAKE_PTR(tmp);
+
+ return fd;
+}
+
+int link_tmpfile(int fd, const char *path, const char *target) {
+ int r;
+
+ assert(fd >= 0);
+ assert(target);
+
+ /* Moves a temporary file created with open_tmpfile() above into its final place. if "path" is NULL an fd
+ * created with O_TMPFILE is assumed, and linkat() is used. Otherwise it is assumed O_TMPFILE is not supported
+ * on the directory, and renameat2() is used instead.
+ *
+ * Note that in both cases we will not replace existing files. This is because linkat() does not support this
+ * operation currently (renameat2() does), and there is no nice way to emulate this. */
+
+ if (path) {
+ r = rename_noreplace(AT_FDCWD, path, AT_FDCWD, target);
+ if (r < 0)
+ return r;
+ } else {
+ char proc_fd_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(fd) + 1];
+
+ xsprintf(proc_fd_path, "/proc/self/fd/%i", fd);
+
+ if (linkat(AT_FDCWD, proc_fd_path, AT_FDCWD, target, AT_SYMLINK_FOLLOW) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mkdtemp_malloc(const char *template, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(ret);
+
+ if (template)
+ p = strdup(template);
+ else {
+ const char *tmp;
+
+ r = tmp_dir(&tmp);
+ if (r < 0)
+ return r;
+
+ p = strjoin(tmp, "/XXXXXX");
+ }
+ if (!p)
+ return -ENOMEM;
+
+ if (!mkdtemp(p))
+ return -errno;
+
+ *ret = TAKE_PTR(p);
+ return 0;
+}
diff --git a/src/basic/tmpfile-util.h b/src/basic/tmpfile-util.h
new file mode 100644
index 0000000..802c85d
--- /dev/null
+++ b/src/basic/tmpfile-util.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+int fopen_temporary(const char *path, FILE **_f, char **_temp_path);
+int mkostemp_safe(char *pattern);
+int fmkostemp_safe(char *pattern, const char *mode, FILE**_f);
+
+int tempfn_xxxxxx(const char *p, const char *extra, char **ret);
+int tempfn_random(const char *p, const char *extra, char **ret);
+int tempfn_random_child(const char *p, const char *extra, char **ret);
+
+int open_tmpfile_unlinkable(const char *directory, int flags);
+int open_tmpfile_linkable(const char *target, int flags, char **ret_path);
+
+int link_tmpfile(int fd, const char *path, const char *target);
+
+int mkdtemp_malloc(const char *template, char **ret);
diff --git a/src/basic/umask-util.h b/src/basic/umask-util.h
new file mode 100644
index 0000000..e964292
--- /dev/null
+++ b/src/basic/umask-util.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+static inline void umaskp(mode_t *u) {
+ umask(*u);
+}
+
+#define _cleanup_umask_ _cleanup_(umaskp)
+
+struct _umask_struct_ {
+ mode_t mask;
+ bool quit;
+};
+
+static inline void _reset_umask_(struct _umask_struct_ *s) {
+ umask(s->mask);
+};
+
+#define RUN_WITH_UMASK(mask) \
+ for (_cleanup_(_reset_umask_) struct _umask_struct_ _saved_umask_ = { umask(mask), false }; \
+ !_saved_umask_.quit ; \
+ _saved_umask_.quit = true)
diff --git a/src/basic/unaligned.h b/src/basic/unaligned.h
new file mode 100644
index 0000000..00c17f8
--- /dev/null
+++ b/src/basic/unaligned.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <endian.h>
+#include <stdint.h>
+
+/* BE */
+
+static inline uint16_t unaligned_read_be16(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ return be16toh(u->x);
+}
+
+static inline uint32_t unaligned_read_be32(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ return be32toh(u->x);
+}
+
+static inline uint64_t unaligned_read_be64(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ return be64toh(u->x);
+}
+
+static inline void unaligned_write_be16(void *_u, uint16_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ u->x = be16toh(a);
+}
+
+static inline void unaligned_write_be32(void *_u, uint32_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ u->x = be32toh(a);
+}
+
+static inline void unaligned_write_be64(void *_u, uint64_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ u->x = be64toh(a);
+}
+
+/* LE */
+
+static inline uint16_t unaligned_read_le16(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ return le16toh(u->x);
+}
+
+static inline uint32_t unaligned_read_le32(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ return le32toh(u->x);
+}
+
+static inline uint64_t unaligned_read_le64(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ return le64toh(u->x);
+}
+
+static inline void unaligned_write_le16(void *_u, uint16_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ u->x = le16toh(a);
+}
+
+static inline void unaligned_write_le32(void *_u, uint32_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ u->x = le32toh(a);
+}
+
+static inline void unaligned_write_le64(void *_u, uint64_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ u->x = le64toh(a);
+}
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define unaligned_read_ne16 unaligned_read_be16
+#define unaligned_read_ne32 unaligned_read_be32
+#define unaligned_read_ne64 unaligned_read_be64
+
+#define unaligned_write_ne16 unaligned_write_be16
+#define unaligned_write_ne32 unaligned_write_be32
+#define unaligned_write_ne64 unaligned_write_be64
+#else
+#define unaligned_read_ne16 unaligned_read_le16
+#define unaligned_read_ne32 unaligned_read_le32
+#define unaligned_read_ne64 unaligned_read_le64
+
+#define unaligned_write_ne16 unaligned_write_le16
+#define unaligned_write_ne32 unaligned_write_le32
+#define unaligned_write_ne64 unaligned_write_le64
+#endif
diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c
new file mode 100644
index 0000000..245daab
--- /dev/null
+++ b/src/basic/unit-def.c
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-label.h"
+#include "string-table.h"
+#include "unit-def.h"
+#include "unit-name.h"
+
+char *unit_dbus_path_from_name(const char *name) {
+ _cleanup_free_ char *e = NULL;
+
+ assert(name);
+
+ e = bus_label_escape(name);
+ if (!e)
+ return NULL;
+
+ return strappend("/org/freedesktop/systemd1/unit/", e);
+}
+
+int unit_name_from_dbus_path(const char *path, char **name) {
+ const char *e;
+ char *n;
+
+ e = startswith(path, "/org/freedesktop/systemd1/unit/");
+ if (!e)
+ return -EINVAL;
+
+ n = bus_label_unescape(e);
+ if (!n)
+ return -ENOMEM;
+
+ *name = n;
+ return 0;
+}
+
+const char* unit_dbus_interface_from_type(UnitType t) {
+
+ static const char *const table[_UNIT_TYPE_MAX] = {
+ [UNIT_SERVICE] = "org.freedesktop.systemd1.Service",
+ [UNIT_SOCKET] = "org.freedesktop.systemd1.Socket",
+ [UNIT_TARGET] = "org.freedesktop.systemd1.Target",
+ [UNIT_DEVICE] = "org.freedesktop.systemd1.Device",
+ [UNIT_MOUNT] = "org.freedesktop.systemd1.Mount",
+ [UNIT_AUTOMOUNT] = "org.freedesktop.systemd1.Automount",
+ [UNIT_SWAP] = "org.freedesktop.systemd1.Swap",
+ [UNIT_TIMER] = "org.freedesktop.systemd1.Timer",
+ [UNIT_PATH] = "org.freedesktop.systemd1.Path",
+ [UNIT_SLICE] = "org.freedesktop.systemd1.Slice",
+ [UNIT_SCOPE] = "org.freedesktop.systemd1.Scope",
+ };
+
+ if (t < 0)
+ return NULL;
+ if (t >= _UNIT_TYPE_MAX)
+ return NULL;
+
+ return table[t];
+}
+
+const char *unit_dbus_interface_from_name(const char *name) {
+ UnitType t;
+
+ t = unit_name_to_type(name);
+ if (t < 0)
+ return NULL;
+
+ return unit_dbus_interface_from_type(t);
+}
+
+static const char* const unit_type_table[_UNIT_TYPE_MAX] = {
+ [UNIT_SERVICE] = "service",
+ [UNIT_SOCKET] = "socket",
+ [UNIT_TARGET] = "target",
+ [UNIT_DEVICE] = "device",
+ [UNIT_MOUNT] = "mount",
+ [UNIT_AUTOMOUNT] = "automount",
+ [UNIT_SWAP] = "swap",
+ [UNIT_TIMER] = "timer",
+ [UNIT_PATH] = "path",
+ [UNIT_SLICE] = "slice",
+ [UNIT_SCOPE] = "scope",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_type, UnitType);
+
+static const char* const unit_load_state_table[_UNIT_LOAD_STATE_MAX] = {
+ [UNIT_STUB] = "stub",
+ [UNIT_LOADED] = "loaded",
+ [UNIT_NOT_FOUND] = "not-found",
+ [UNIT_BAD_SETTING] = "bad-setting",
+ [UNIT_ERROR] = "error",
+ [UNIT_MERGED] = "merged",
+ [UNIT_MASKED] = "masked"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_load_state, UnitLoadState);
+
+static const char* const unit_active_state_table[_UNIT_ACTIVE_STATE_MAX] = {
+ [UNIT_ACTIVE] = "active",
+ [UNIT_RELOADING] = "reloading",
+ [UNIT_INACTIVE] = "inactive",
+ [UNIT_FAILED] = "failed",
+ [UNIT_ACTIVATING] = "activating",
+ [UNIT_DEACTIVATING] = "deactivating"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_active_state, UnitActiveState);
+
+static const char* const automount_state_table[_AUTOMOUNT_STATE_MAX] = {
+ [AUTOMOUNT_DEAD] = "dead",
+ [AUTOMOUNT_WAITING] = "waiting",
+ [AUTOMOUNT_RUNNING] = "running",
+ [AUTOMOUNT_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(automount_state, AutomountState);
+
+static const char* const device_state_table[_DEVICE_STATE_MAX] = {
+ [DEVICE_DEAD] = "dead",
+ [DEVICE_TENTATIVE] = "tentative",
+ [DEVICE_PLUGGED] = "plugged",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(device_state, DeviceState);
+
+static const char* const mount_state_table[_MOUNT_STATE_MAX] = {
+ [MOUNT_DEAD] = "dead",
+ [MOUNT_MOUNTING] = "mounting",
+ [MOUNT_MOUNTING_DONE] = "mounting-done",
+ [MOUNT_MOUNTED] = "mounted",
+ [MOUNT_REMOUNTING] = "remounting",
+ [MOUNT_UNMOUNTING] = "unmounting",
+ [MOUNT_REMOUNTING_SIGTERM] = "remounting-sigterm",
+ [MOUNT_REMOUNTING_SIGKILL] = "remounting-sigkill",
+ [MOUNT_UNMOUNTING_SIGTERM] = "unmounting-sigterm",
+ [MOUNT_UNMOUNTING_SIGKILL] = "unmounting-sigkill",
+ [MOUNT_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mount_state, MountState);
+
+static const char* const path_state_table[_PATH_STATE_MAX] = {
+ [PATH_DEAD] = "dead",
+ [PATH_WAITING] = "waiting",
+ [PATH_RUNNING] = "running",
+ [PATH_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(path_state, PathState);
+
+static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
+ [SCOPE_DEAD] = "dead",
+ [SCOPE_RUNNING] = "running",
+ [SCOPE_ABANDONED] = "abandoned",
+ [SCOPE_STOP_SIGTERM] = "stop-sigterm",
+ [SCOPE_STOP_SIGKILL] = "stop-sigkill",
+ [SCOPE_FAILED] = "failed",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(scope_state, ScopeState);
+
+static const char* const service_state_table[_SERVICE_STATE_MAX] = {
+ [SERVICE_DEAD] = "dead",
+ [SERVICE_START_PRE] = "start-pre",
+ [SERVICE_START] = "start",
+ [SERVICE_START_POST] = "start-post",
+ [SERVICE_RUNNING] = "running",
+ [SERVICE_EXITED] = "exited",
+ [SERVICE_RELOAD] = "reload",
+ [SERVICE_STOP] = "stop",
+ [SERVICE_STOP_WATCHDOG] = "stop-watchdog",
+ [SERVICE_STOP_SIGTERM] = "stop-sigterm",
+ [SERVICE_STOP_SIGKILL] = "stop-sigkill",
+ [SERVICE_STOP_POST] = "stop-post",
+ [SERVICE_FINAL_SIGTERM] = "final-sigterm",
+ [SERVICE_FINAL_SIGKILL] = "final-sigkill",
+ [SERVICE_FAILED] = "failed",
+ [SERVICE_AUTO_RESTART] = "auto-restart",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_state, ServiceState);
+
+static const char* const slice_state_table[_SLICE_STATE_MAX] = {
+ [SLICE_DEAD] = "dead",
+ [SLICE_ACTIVE] = "active"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(slice_state, SliceState);
+
+static const char* const socket_state_table[_SOCKET_STATE_MAX] = {
+ [SOCKET_DEAD] = "dead",
+ [SOCKET_START_PRE] = "start-pre",
+ [SOCKET_START_CHOWN] = "start-chown",
+ [SOCKET_START_POST] = "start-post",
+ [SOCKET_LISTENING] = "listening",
+ [SOCKET_RUNNING] = "running",
+ [SOCKET_STOP_PRE] = "stop-pre",
+ [SOCKET_STOP_PRE_SIGTERM] = "stop-pre-sigterm",
+ [SOCKET_STOP_PRE_SIGKILL] = "stop-pre-sigkill",
+ [SOCKET_STOP_POST] = "stop-post",
+ [SOCKET_FINAL_SIGTERM] = "final-sigterm",
+ [SOCKET_FINAL_SIGKILL] = "final-sigkill",
+ [SOCKET_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_state, SocketState);
+
+static const char* const swap_state_table[_SWAP_STATE_MAX] = {
+ [SWAP_DEAD] = "dead",
+ [SWAP_ACTIVATING] = "activating",
+ [SWAP_ACTIVATING_DONE] = "activating-done",
+ [SWAP_ACTIVE] = "active",
+ [SWAP_DEACTIVATING] = "deactivating",
+ [SWAP_DEACTIVATING_SIGTERM] = "deactivating-sigterm",
+ [SWAP_DEACTIVATING_SIGKILL] = "deactivating-sigkill",
+ [SWAP_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(swap_state, SwapState);
+
+static const char* const target_state_table[_TARGET_STATE_MAX] = {
+ [TARGET_DEAD] = "dead",
+ [TARGET_ACTIVE] = "active"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(target_state, TargetState);
+
+static const char* const timer_state_table[_TIMER_STATE_MAX] = {
+ [TIMER_DEAD] = "dead",
+ [TIMER_WAITING] = "waiting",
+ [TIMER_RUNNING] = "running",
+ [TIMER_ELAPSED] = "elapsed",
+ [TIMER_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(timer_state, TimerState);
+
+static const char* const unit_dependency_table[_UNIT_DEPENDENCY_MAX] = {
+ [UNIT_REQUIRES] = "Requires",
+ [UNIT_REQUISITE] = "Requisite",
+ [UNIT_WANTS] = "Wants",
+ [UNIT_BINDS_TO] = "BindsTo",
+ [UNIT_PART_OF] = "PartOf",
+ [UNIT_REQUIRED_BY] = "RequiredBy",
+ [UNIT_REQUISITE_OF] = "RequisiteOf",
+ [UNIT_WANTED_BY] = "WantedBy",
+ [UNIT_BOUND_BY] = "BoundBy",
+ [UNIT_CONSISTS_OF] = "ConsistsOf",
+ [UNIT_CONFLICTS] = "Conflicts",
+ [UNIT_CONFLICTED_BY] = "ConflictedBy",
+ [UNIT_BEFORE] = "Before",
+ [UNIT_AFTER] = "After",
+ [UNIT_ON_FAILURE] = "OnFailure",
+ [UNIT_TRIGGERS] = "Triggers",
+ [UNIT_TRIGGERED_BY] = "TriggeredBy",
+ [UNIT_PROPAGATES_RELOAD_TO] = "PropagatesReloadTo",
+ [UNIT_RELOAD_PROPAGATED_FROM] = "ReloadPropagatedFrom",
+ [UNIT_JOINS_NAMESPACE_OF] = "JoinsNamespaceOf",
+ [UNIT_REFERENCES] = "References",
+ [UNIT_REFERENCED_BY] = "ReferencedBy",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_dependency, UnitDependency);
+
+static const char* const notify_access_table[_NOTIFY_ACCESS_MAX] = {
+ [NOTIFY_NONE] = "none",
+ [NOTIFY_MAIN] = "main",
+ [NOTIFY_EXEC] = "exec",
+ [NOTIFY_ALL] = "all"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(notify_access, NotifyAccess);
diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h
new file mode 100644
index 0000000..85f3e42
--- /dev/null
+++ b/src/basic/unit-def.h
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+typedef enum UnitType {
+ UNIT_SERVICE = 0,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_DEVICE,
+ UNIT_MOUNT,
+ UNIT_AUTOMOUNT,
+ UNIT_SWAP,
+ UNIT_TIMER,
+ UNIT_PATH,
+ UNIT_SLICE,
+ UNIT_SCOPE,
+ _UNIT_TYPE_MAX,
+ _UNIT_TYPE_INVALID = -1
+} UnitType;
+
+typedef enum UnitLoadState {
+ UNIT_STUB = 0,
+ UNIT_LOADED,
+ UNIT_NOT_FOUND, /* error condition #1: unit file not found */
+ UNIT_BAD_SETTING, /* error condition #2: we couldn't parse some essential unit file setting */
+ UNIT_ERROR, /* error condition #3: other "system" error, catchall for the rest */
+ UNIT_MERGED,
+ UNIT_MASKED,
+ _UNIT_LOAD_STATE_MAX,
+ _UNIT_LOAD_STATE_INVALID = -1
+} UnitLoadState;
+
+typedef enum UnitActiveState {
+ UNIT_ACTIVE,
+ UNIT_RELOADING,
+ UNIT_INACTIVE,
+ UNIT_FAILED,
+ UNIT_ACTIVATING,
+ UNIT_DEACTIVATING,
+ _UNIT_ACTIVE_STATE_MAX,
+ _UNIT_ACTIVE_STATE_INVALID = -1
+} UnitActiveState;
+
+typedef enum AutomountState {
+ AUTOMOUNT_DEAD,
+ AUTOMOUNT_WAITING,
+ AUTOMOUNT_RUNNING,
+ AUTOMOUNT_FAILED,
+ _AUTOMOUNT_STATE_MAX,
+ _AUTOMOUNT_STATE_INVALID = -1
+} AutomountState;
+
+/* We simply watch devices, we cannot plug/unplug them. That
+ * simplifies the state engine greatly */
+typedef enum DeviceState {
+ DEVICE_DEAD,
+ DEVICE_TENTATIVE, /* mounted or swapped, but not (yet) announced by udev */
+ DEVICE_PLUGGED, /* announced by udev */
+ _DEVICE_STATE_MAX,
+ _DEVICE_STATE_INVALID = -1
+} DeviceState;
+
+typedef enum MountState {
+ MOUNT_DEAD,
+ MOUNT_MOUNTING, /* /usr/bin/mount is running, but the mount is not done yet. */
+ MOUNT_MOUNTING_DONE, /* /usr/bin/mount is running, and the mount is done. */
+ MOUNT_MOUNTED,
+ MOUNT_REMOUNTING,
+ MOUNT_UNMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM,
+ MOUNT_REMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_FAILED,
+ _MOUNT_STATE_MAX,
+ _MOUNT_STATE_INVALID = -1
+} MountState;
+
+typedef enum PathState {
+ PATH_DEAD,
+ PATH_WAITING,
+ PATH_RUNNING,
+ PATH_FAILED,
+ _PATH_STATE_MAX,
+ _PATH_STATE_INVALID = -1
+} PathState;
+
+typedef enum ScopeState {
+ SCOPE_DEAD,
+ SCOPE_RUNNING,
+ SCOPE_ABANDONED,
+ SCOPE_STOP_SIGTERM,
+ SCOPE_STOP_SIGKILL,
+ SCOPE_FAILED,
+ _SCOPE_STATE_MAX,
+ _SCOPE_STATE_INVALID = -1
+} ScopeState;
+
+typedef enum ServiceState {
+ SERVICE_DEAD,
+ SERVICE_START_PRE,
+ SERVICE_START,
+ SERVICE_START_POST,
+ SERVICE_RUNNING,
+ SERVICE_EXITED, /* Nothing is running anymore, but RemainAfterExit is true hence this is OK */
+ SERVICE_RELOAD,
+ SERVICE_STOP, /* No STOP_PRE state, instead just register multiple STOP executables */
+ SERVICE_STOP_WATCHDOG,
+ SERVICE_STOP_SIGTERM,
+ SERVICE_STOP_SIGKILL,
+ SERVICE_STOP_POST,
+ SERVICE_FINAL_SIGTERM, /* In case the STOP_POST executable hangs, we shoot that down, too */
+ SERVICE_FINAL_SIGKILL,
+ SERVICE_FAILED,
+ SERVICE_AUTO_RESTART,
+ _SERVICE_STATE_MAX,
+ _SERVICE_STATE_INVALID = -1
+} ServiceState;
+
+typedef enum SliceState {
+ SLICE_DEAD,
+ SLICE_ACTIVE,
+ _SLICE_STATE_MAX,
+ _SLICE_STATE_INVALID = -1
+} SliceState;
+
+typedef enum SocketState {
+ SOCKET_DEAD,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_LISTENING,
+ SOCKET_RUNNING,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL,
+ SOCKET_FAILED,
+ _SOCKET_STATE_MAX,
+ _SOCKET_STATE_INVALID = -1
+} SocketState;
+
+typedef enum SwapState {
+ SWAP_DEAD,
+ SWAP_ACTIVATING, /* /sbin/swapon is running, but the swap not yet enabled. */
+ SWAP_ACTIVATING_DONE, /* /sbin/swapon is running, and the swap is done. */
+ SWAP_ACTIVE,
+ SWAP_DEACTIVATING,
+ SWAP_DEACTIVATING_SIGTERM,
+ SWAP_DEACTIVATING_SIGKILL,
+ SWAP_FAILED,
+ _SWAP_STATE_MAX,
+ _SWAP_STATE_INVALID = -1
+} SwapState;
+
+typedef enum TargetState {
+ TARGET_DEAD,
+ TARGET_ACTIVE,
+ _TARGET_STATE_MAX,
+ _TARGET_STATE_INVALID = -1
+} TargetState;
+
+typedef enum TimerState {
+ TIMER_DEAD,
+ TIMER_WAITING,
+ TIMER_RUNNING,
+ TIMER_ELAPSED,
+ TIMER_FAILED,
+ _TIMER_STATE_MAX,
+ _TIMER_STATE_INVALID = -1
+} TimerState;
+
+typedef enum UnitDependency {
+ /* Positive dependencies */
+ UNIT_REQUIRES,
+ UNIT_REQUISITE,
+ UNIT_WANTS,
+ UNIT_BINDS_TO,
+ UNIT_PART_OF,
+
+ /* Inverse of the above */
+ UNIT_REQUIRED_BY, /* inverse of 'requires' is 'required_by' */
+ UNIT_REQUISITE_OF, /* inverse of 'requisite' is 'requisite_of' */
+ UNIT_WANTED_BY, /* inverse of 'wants' */
+ UNIT_BOUND_BY, /* inverse of 'binds_to' */
+ UNIT_CONSISTS_OF, /* inverse of 'part_of' */
+
+ /* Negative dependencies */
+ UNIT_CONFLICTS, /* inverse of 'conflicts' is 'conflicted_by' */
+ UNIT_CONFLICTED_BY,
+
+ /* Order */
+ UNIT_BEFORE, /* inverse of 'before' is 'after' and vice versa */
+ UNIT_AFTER,
+
+ /* On Failure */
+ UNIT_ON_FAILURE,
+
+ /* Triggers (i.e. a socket triggers a service) */
+ UNIT_TRIGGERS,
+ UNIT_TRIGGERED_BY,
+
+ /* Propagate reloads */
+ UNIT_PROPAGATES_RELOAD_TO,
+ UNIT_RELOAD_PROPAGATED_FROM,
+
+ /* Joins namespace of */
+ UNIT_JOINS_NAMESPACE_OF,
+
+ /* Reference information for GC logic */
+ UNIT_REFERENCES, /* Inverse of 'references' is 'referenced_by' */
+ UNIT_REFERENCED_BY,
+
+ _UNIT_DEPENDENCY_MAX,
+ _UNIT_DEPENDENCY_INVALID = -1
+} UnitDependency;
+
+typedef enum NotifyAccess {
+ NOTIFY_NONE,
+ NOTIFY_ALL,
+ NOTIFY_MAIN,
+ NOTIFY_EXEC,
+ _NOTIFY_ACCESS_MAX,
+ _NOTIFY_ACCESS_INVALID = -1
+} NotifyAccess;
+
+char *unit_dbus_path_from_name(const char *name);
+int unit_name_from_dbus_path(const char *path, char **name);
+
+const char* unit_dbus_interface_from_type(UnitType t);
+const char *unit_dbus_interface_from_name(const char *name);
+
+const char *unit_type_to_string(UnitType i) _const_;
+UnitType unit_type_from_string(const char *s) _pure_;
+
+const char *unit_load_state_to_string(UnitLoadState i) _const_;
+UnitLoadState unit_load_state_from_string(const char *s) _pure_;
+
+const char *unit_active_state_to_string(UnitActiveState i) _const_;
+UnitActiveState unit_active_state_from_string(const char *s) _pure_;
+
+const char* automount_state_to_string(AutomountState i) _const_;
+AutomountState automount_state_from_string(const char *s) _pure_;
+
+const char* device_state_to_string(DeviceState i) _const_;
+DeviceState device_state_from_string(const char *s) _pure_;
+
+const char* mount_state_to_string(MountState i) _const_;
+MountState mount_state_from_string(const char *s) _pure_;
+
+const char* path_state_to_string(PathState i) _const_;
+PathState path_state_from_string(const char *s) _pure_;
+
+const char* scope_state_to_string(ScopeState i) _const_;
+ScopeState scope_state_from_string(const char *s) _pure_;
+
+const char* service_state_to_string(ServiceState i) _const_;
+ServiceState service_state_from_string(const char *s) _pure_;
+
+const char* slice_state_to_string(SliceState i) _const_;
+SliceState slice_state_from_string(const char *s) _pure_;
+
+const char* socket_state_to_string(SocketState i) _const_;
+SocketState socket_state_from_string(const char *s) _pure_;
+
+const char* swap_state_to_string(SwapState i) _const_;
+SwapState swap_state_from_string(const char *s) _pure_;
+
+const char* target_state_to_string(TargetState i) _const_;
+TargetState target_state_from_string(const char *s) _pure_;
+
+const char *timer_state_to_string(TimerState i) _const_;
+TimerState timer_state_from_string(const char *s) _pure_;
+
+const char *unit_dependency_to_string(UnitDependency i) _const_;
+UnitDependency unit_dependency_from_string(const char *s) _pure_;
+
+const char* notify_access_to_string(NotifyAccess i) _const_;
+NotifyAccess notify_access_from_string(const char *s) _pure_;
diff --git a/src/basic/unit-name.c b/src/basic/unit-name.c
new file mode 100644
index 0000000..1b81fe2
--- /dev/null
+++ b/src/basic/unit-name.c
@@ -0,0 +1,775 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "glob-util.h"
+#include "hexdecoct.h"
+#include "path-util.h"
+#include "special.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+/* Characters valid in a unit name. */
+#define VALID_CHARS \
+ DIGITS \
+ LETTERS \
+ ":-_.\\"
+
+/* The same, but also permits the single @ character that may appear */
+#define VALID_CHARS_WITH_AT \
+ "@" \
+ VALID_CHARS
+
+/* All chars valid in a unit name glob */
+#define VALID_CHARS_GLOB \
+ VALID_CHARS_WITH_AT \
+ "[]!-*?"
+
+bool unit_name_is_valid(const char *n, UnitNameFlags flags) {
+ const char *e, *i, *at;
+
+ assert((flags & ~(UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE)) == 0);
+
+ if (_unlikely_(flags == 0))
+ return false;
+
+ if (isempty(n))
+ return false;
+
+ if (strlen(n) >= UNIT_NAME_MAX)
+ return false;
+
+ e = strrchr(n, '.');
+ if (!e || e == n)
+ return false;
+
+ if (unit_type_from_string(e + 1) < 0)
+ return false;
+
+ for (i = n, at = NULL; i < e; i++) {
+
+ if (*i == '@' && !at)
+ at = i;
+
+ if (!strchr("@" VALID_CHARS, *i))
+ return false;
+ }
+
+ if (at == n)
+ return false;
+
+ if (flags & UNIT_NAME_PLAIN)
+ if (!at)
+ return true;
+
+ if (flags & UNIT_NAME_INSTANCE)
+ if (at && e > at + 1)
+ return true;
+
+ if (flags & UNIT_NAME_TEMPLATE)
+ if (at && e == at + 1)
+ return true;
+
+ return false;
+}
+
+bool unit_prefix_is_valid(const char *p) {
+
+ /* We don't allow additional @ in the prefix string */
+
+ if (isempty(p))
+ return false;
+
+ return in_charset(p, VALID_CHARS);
+}
+
+bool unit_instance_is_valid(const char *i) {
+
+ /* The max length depends on the length of the string, so we
+ * don't really check this here. */
+
+ if (isempty(i))
+ return false;
+
+ /* We allow additional @ in the instance string, we do not
+ * allow them in the prefix! */
+
+ return in_charset(i, "@" VALID_CHARS);
+}
+
+bool unit_suffix_is_valid(const char *s) {
+ if (isempty(s))
+ return false;
+
+ if (s[0] != '.')
+ return false;
+
+ if (unit_type_from_string(s + 1) < 0)
+ return false;
+
+ return true;
+}
+
+int unit_name_to_prefix(const char *n, char **ret) {
+ const char *p;
+ char *s;
+
+ assert(n);
+ assert(ret);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ p = strchr(n, '@');
+ if (!p)
+ p = strrchr(n, '.');
+
+ assert_se(p);
+
+ s = strndup(n, p - n);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_to_instance(const char *n, char **instance) {
+ const char *p, *d;
+ char *i;
+
+ assert(n);
+ assert(instance);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ /* Everything past the first @ and before the last . is the instance */
+ p = strchr(n, '@');
+ if (!p) {
+ *instance = NULL;
+ return 0;
+ }
+
+ p++;
+
+ d = strrchr(p, '.');
+ if (!d)
+ return -EINVAL;
+
+ i = strndup(p, d-p);
+ if (!i)
+ return -ENOMEM;
+
+ *instance = i;
+ return 1;
+}
+
+int unit_name_to_prefix_and_instance(const char *n, char **ret) {
+ const char *d;
+ char *s;
+
+ assert(n);
+ assert(ret);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ d = strrchr(n, '.');
+ if (!d)
+ return -EINVAL;
+
+ s = strndup(n, d - n);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+UnitType unit_name_to_type(const char *n) {
+ const char *e;
+
+ assert(n);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return _UNIT_TYPE_INVALID;
+
+ assert_se(e = strrchr(n, '.'));
+
+ return unit_type_from_string(e + 1);
+}
+
+int unit_name_change_suffix(const char *n, const char *suffix, char **ret) {
+ char *e, *s;
+ size_t a, b;
+
+ assert(n);
+ assert(suffix);
+ assert(ret);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ assert_se(e = strrchr(n, '.'));
+
+ a = e - n;
+ b = strlen(suffix);
+
+ s = new(char, a + b + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(s, n, a), suffix);
+ *ret = s;
+
+ return 0;
+}
+
+int unit_name_build(const char *prefix, const char *instance, const char *suffix, char **ret) {
+ UnitType type;
+
+ assert(prefix);
+ assert(suffix);
+ assert(ret);
+
+ if (suffix[0] != '.')
+ return -EINVAL;
+
+ type = unit_type_from_string(suffix + 1);
+ if (type < 0)
+ return -EINVAL;
+
+ return unit_name_build_from_type(prefix, instance, type, ret);
+}
+
+int unit_name_build_from_type(const char *prefix, const char *instance, UnitType type, char **ret) {
+ const char *ut;
+ char *s;
+
+ assert(prefix);
+ assert(type >= 0);
+ assert(type < _UNIT_TYPE_MAX);
+ assert(ret);
+
+ if (!unit_prefix_is_valid(prefix))
+ return -EINVAL;
+
+ if (instance && !unit_instance_is_valid(instance))
+ return -EINVAL;
+
+ ut = unit_type_to_string(type);
+
+ if (!instance)
+ s = strjoin(prefix, ".", ut);
+ else
+ s = strjoin(prefix, "@", instance, ".", ut);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+static char *do_escape_char(char c, char *t) {
+ assert(t);
+
+ *(t++) = '\\';
+ *(t++) = 'x';
+ *(t++) = hexchar(c >> 4);
+ *(t++) = hexchar(c);
+
+ return t;
+}
+
+static char *do_escape(const char *f, char *t) {
+ assert(f);
+ assert(t);
+
+ /* do not create units with a leading '.', like for "/.dotdir" mount points */
+ if (*f == '.') {
+ t = do_escape_char(*f, t);
+ f++;
+ }
+
+ for (; *f; f++) {
+ if (*f == '/')
+ *(t++) = '-';
+ else if (IN_SET(*f, '-', '\\') || !strchr(VALID_CHARS, *f))
+ t = do_escape_char(*f, t);
+ else
+ *(t++) = *f;
+ }
+
+ return t;
+}
+
+char *unit_name_escape(const char *f) {
+ char *r, *t;
+
+ assert(f);
+
+ r = new(char, strlen(f)*4+1);
+ if (!r)
+ return NULL;
+
+ t = do_escape(f, r);
+ *t = 0;
+
+ return r;
+}
+
+int unit_name_unescape(const char *f, char **ret) {
+ _cleanup_free_ char *r = NULL;
+ char *t;
+
+ assert(f);
+
+ r = strdup(f);
+ if (!r)
+ return -ENOMEM;
+
+ for (t = r; *f; f++) {
+ if (*f == '-')
+ *(t++) = '/';
+ else if (*f == '\\') {
+ int a, b;
+
+ if (f[1] != 'x')
+ return -EINVAL;
+
+ a = unhexchar(f[2]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unhexchar(f[3]);
+ if (b < 0)
+ return -EINVAL;
+
+ *(t++) = (char) (((uint8_t) a << 4U) | (uint8_t) b);
+ f += 3;
+ } else
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ *ret = TAKE_PTR(r);
+
+ return 0;
+}
+
+int unit_name_path_escape(const char *f, char **ret) {
+ char *p, *s;
+
+ assert(f);
+ assert(ret);
+
+ p = strdupa(f);
+ if (!p)
+ return -ENOMEM;
+
+ path_simplify(p, false);
+
+ if (empty_or_root(p))
+ s = strdup("-");
+ else {
+ if (!path_is_normalized(p))
+ return -EINVAL;
+
+ /* Truncate trailing slashes */
+ delete_trailing_chars(p, "/");
+
+ /* Truncate leading slashes */
+ p = skip_leading_chars(p, "/");
+
+ s = unit_name_escape(p);
+ }
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_path_unescape(const char *f, char **ret) {
+ char *s;
+ int r;
+
+ assert(f);
+
+ if (isempty(f))
+ return -EINVAL;
+
+ if (streq(f, "-")) {
+ s = strdup("/");
+ if (!s)
+ return -ENOMEM;
+ } else {
+ char *w;
+
+ r = unit_name_unescape(f, &w);
+ if (r < 0)
+ return r;
+
+ /* Don't accept trailing or leading slashes */
+ if (startswith(w, "/") || endswith(w, "/")) {
+ free(w);
+ return -EINVAL;
+ }
+
+ /* Prefix a slash again */
+ s = strappend("/", w);
+ free(w);
+ if (!s)
+ return -ENOMEM;
+
+ if (!path_is_normalized(s)) {
+ free(s);
+ return -EINVAL;
+ }
+ }
+
+ if (ret)
+ *ret = s;
+ else
+ free(s);
+
+ return 0;
+}
+
+int unit_name_replace_instance(const char *f, const char *i, char **ret) {
+ const char *p, *e;
+ char *s;
+ size_t a, b;
+
+ assert(f);
+ assert(i);
+ assert(ret);
+
+ if (!unit_name_is_valid(f, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+ if (!unit_instance_is_valid(i))
+ return -EINVAL;
+
+ assert_se(p = strchr(f, '@'));
+ assert_se(e = strrchr(f, '.'));
+
+ a = p - f;
+ b = strlen(i);
+
+ s = new(char, a + 1 + b + strlen(e) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(mempcpy(s, f, a + 1), i, b), e);
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_template(const char *f, char **ret) {
+ const char *p, *e;
+ char *s;
+ size_t a;
+
+ assert(f);
+ assert(ret);
+
+ if (!unit_name_is_valid(f, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+
+ assert_se(p = strchr(f, '@'));
+ assert_se(e = strrchr(f, '.'));
+
+ a = p - f;
+
+ s = new(char, a + 1 + strlen(e) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(s, f, a + 1), e);
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_from_path(const char *path, const char *suffix, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ char *s = NULL;
+ int r;
+
+ assert(path);
+ assert(suffix);
+ assert(ret);
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ r = unit_name_path_escape(path, &p);
+ if (r < 0)
+ return r;
+
+ s = strappend(p, suffix);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_from_path_instance(const char *prefix, const char *path, const char *suffix, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ char *s;
+ int r;
+
+ assert(prefix);
+ assert(path);
+ assert(suffix);
+ assert(ret);
+
+ if (!unit_prefix_is_valid(prefix))
+ return -EINVAL;
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ r = unit_name_path_escape(path, &p);
+ if (r < 0)
+ return r;
+
+ s = strjoin(prefix, "@", p, suffix);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_to_path(const char *name, char **ret) {
+ _cleanup_free_ char *prefix = NULL;
+ int r;
+
+ assert(name);
+
+ r = unit_name_to_prefix(name, &prefix);
+ if (r < 0)
+ return r;
+
+ return unit_name_path_unescape(prefix, ret);
+}
+
+static bool do_escape_mangle(const char *f, bool allow_globs, char *t) {
+ const char *valid_chars;
+ bool mangled = false;
+
+ assert(f);
+ assert(t);
+
+ /* We'll only escape the obvious characters here, to play safe.
+ *
+ * Returns true if any characters were mangled, false otherwise.
+ */
+
+ valid_chars = allow_globs ? VALID_CHARS_GLOB : VALID_CHARS_WITH_AT;
+
+ for (; *f; f++)
+ if (*f == '/') {
+ *(t++) = '-';
+ mangled = true;
+ } else if (!strchr(valid_chars, *f)) {
+ t = do_escape_char(*f, t);
+ mangled = true;
+ } else
+ *(t++) = *f;
+ *t = 0;
+
+ return mangled;
+}
+
+/**
+ * Convert a string to a unit name. /dev/blah is converted to dev-blah.device,
+ * /blah/blah is converted to blah-blah.mount, anything else is left alone,
+ * except that @suffix is appended if a valid unit suffix is not present.
+ *
+ * If @allow_globs, globs characters are preserved. Otherwise, they are escaped.
+ */
+int unit_name_mangle_with_suffix(const char *name, UnitNameMangle flags, const char *suffix, char **ret) {
+ char *s;
+ int r;
+ bool mangled;
+
+ assert(name);
+ assert(suffix);
+ assert(ret);
+
+ if (isempty(name)) /* We cannot mangle empty unit names to become valid, sorry. */
+ return -EINVAL;
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ /* Already a fully valid unit name? If so, no mangling is necessary... */
+ if (unit_name_is_valid(name, UNIT_NAME_ANY))
+ goto good;
+
+ /* Already a fully valid globbing expression? If so, no mangling is necessary either... */
+ if ((flags & UNIT_NAME_MANGLE_GLOB) &&
+ string_is_glob(name) &&
+ in_charset(name, VALID_CHARS_GLOB))
+ goto good;
+
+ if (is_device_path(name)) {
+ r = unit_name_from_path(name, ".device", ret);
+ if (r >= 0)
+ return 1;
+ if (r != -EINVAL)
+ return r;
+ }
+
+ if (path_is_absolute(name)) {
+ r = unit_name_from_path(name, ".mount", ret);
+ if (r >= 0)
+ return 1;
+ if (r != -EINVAL)
+ return r;
+ }
+
+ s = new(char, strlen(name) * 4 + strlen(suffix) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ mangled = do_escape_mangle(name, flags & UNIT_NAME_MANGLE_GLOB, s);
+ if (mangled)
+ log_full(flags & UNIT_NAME_MANGLE_WARN ? LOG_NOTICE : LOG_DEBUG,
+ "Invalid unit name \"%s\" was escaped as \"%s\" (maybe you should use systemd-escape?)",
+ name, s);
+
+ /* Append a suffix if it doesn't have any, but only if this is not a glob, so that we can allow "foo.*" as a
+ * valid glob. */
+ if ((!(flags & UNIT_NAME_MANGLE_GLOB) || !string_is_glob(s)) && unit_name_to_type(s) < 0)
+ strcat(s, suffix);
+
+ *ret = s;
+ return 1;
+
+good:
+ s = strdup(name);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int slice_build_parent_slice(const char *slice, char **ret) {
+ char *s, *dash;
+ int r;
+
+ assert(slice);
+ assert(ret);
+
+ if (!slice_name_is_valid(slice))
+ return -EINVAL;
+
+ if (streq(slice, SPECIAL_ROOT_SLICE)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ s = strdup(slice);
+ if (!s)
+ return -ENOMEM;
+
+ dash = strrchr(s, '-');
+ if (dash)
+ strcpy(dash, ".slice");
+ else {
+ r = free_and_strdup(&s, SPECIAL_ROOT_SLICE);
+ if (r < 0) {
+ free(s);
+ return r;
+ }
+ }
+
+ *ret = s;
+ return 1;
+}
+
+int slice_build_subslice(const char *slice, const char *name, char **ret) {
+ char *subslice;
+
+ assert(slice);
+ assert(name);
+ assert(ret);
+
+ if (!slice_name_is_valid(slice))
+ return -EINVAL;
+
+ if (!unit_prefix_is_valid(name))
+ return -EINVAL;
+
+ if (streq(slice, SPECIAL_ROOT_SLICE))
+ subslice = strappend(name, ".slice");
+ else {
+ char *e;
+
+ assert_se(e = endswith(slice, ".slice"));
+
+ subslice = new(char, (e - slice) + 1 + strlen(name) + 6 + 1);
+ if (!subslice)
+ return -ENOMEM;
+
+ stpcpy(stpcpy(stpcpy(mempcpy(subslice, slice, e - slice), "-"), name), ".slice");
+ }
+
+ *ret = subslice;
+ return 0;
+}
+
+bool slice_name_is_valid(const char *name) {
+ const char *p, *e;
+ bool dash = false;
+
+ if (!unit_name_is_valid(name, UNIT_NAME_PLAIN))
+ return false;
+
+ if (streq(name, SPECIAL_ROOT_SLICE))
+ return true;
+
+ e = endswith(name, ".slice");
+ if (!e)
+ return false;
+
+ for (p = name; p < e; p++) {
+
+ if (*p == '-') {
+
+ /* Don't allow initial dash */
+ if (p == name)
+ return false;
+
+ /* Don't allow multiple dashes */
+ if (dash)
+ return false;
+
+ dash = true;
+ } else
+ dash = false;
+ }
+
+ /* Don't allow trailing hash */
+ if (dash)
+ return false;
+
+ return true;
+}
diff --git a/src/basic/unit-name.h b/src/basic/unit-name.h
new file mode 100644
index 0000000..0629db3
--- /dev/null
+++ b/src/basic/unit-name.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+#include "unit-def.h"
+
+#define UNIT_NAME_MAX 256
+
+typedef enum UnitNameFlags {
+ UNIT_NAME_PLAIN = 1 << 0, /* Allow foo.service */
+ UNIT_NAME_INSTANCE = 1 << 1, /* Allow foo@bar.service */
+ UNIT_NAME_TEMPLATE = 1 << 2, /* Allow foo@.service */
+ UNIT_NAME_ANY = UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE,
+} UnitNameFlags;
+
+bool unit_name_is_valid(const char *n, UnitNameFlags flags) _pure_;
+bool unit_prefix_is_valid(const char *p) _pure_;
+bool unit_instance_is_valid(const char *i) _pure_;
+bool unit_suffix_is_valid(const char *s) _pure_;
+
+static inline int unit_prefix_and_instance_is_valid(const char *p) {
+ /* For prefix+instance and instance the same rules apply */
+ return unit_instance_is_valid(p);
+}
+
+int unit_name_to_prefix(const char *n, char **prefix);
+int unit_name_to_instance(const char *n, char **instance);
+int unit_name_to_prefix_and_instance(const char *n, char **ret);
+
+UnitType unit_name_to_type(const char *n) _pure_;
+
+int unit_name_change_suffix(const char *n, const char *suffix, char **ret);
+
+int unit_name_build(const char *prefix, const char *instance, const char *suffix, char **ret);
+int unit_name_build_from_type(const char *prefix, const char *instance, UnitType, char **ret);
+
+char *unit_name_escape(const char *f);
+int unit_name_unescape(const char *f, char **ret);
+int unit_name_path_escape(const char *f, char **ret);
+int unit_name_path_unescape(const char *f, char **ret);
+
+int unit_name_replace_instance(const char *f, const char *i, char **ret);
+
+int unit_name_template(const char *f, char **ret);
+
+int unit_name_from_path(const char *path, const char *suffix, char **ret);
+int unit_name_from_path_instance(const char *prefix, const char *path, const char *suffix, char **ret);
+int unit_name_to_path(const char *name, char **ret);
+
+typedef enum UnitNameMangle {
+ UNIT_NAME_MANGLE_GLOB = 1 << 0,
+ UNIT_NAME_MANGLE_WARN = 1 << 1,
+} UnitNameMangle;
+
+int unit_name_mangle_with_suffix(const char *name, UnitNameMangle flags, const char *suffix, char **ret);
+
+static inline int unit_name_mangle(const char *name, UnitNameMangle flags, char **ret) {
+ return unit_name_mangle_with_suffix(name, flags, ".service", ret);
+}
+
+int slice_build_parent_slice(const char *slice, char **ret);
+int slice_build_subslice(const char *slice, const char *name, char **subslice);
+bool slice_name_is_valid(const char *name);
diff --git a/src/basic/user-util.c b/src/basic/user-util.c
new file mode 100644
index 0000000..260f3d2
--- /dev/null
+++ b/src/basic/user-util.c
@@ -0,0 +1,857 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <utmp.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "utf8.h"
+
+bool uid_is_valid(uid_t uid) {
+
+ /* Also see POSIX IEEE Std 1003.1-2008, 2016 Edition, 3.436. */
+
+ /* Some libc APIs use UID_INVALID as special placeholder */
+ if (uid == (uid_t) UINT32_C(0xFFFFFFFF))
+ return false;
+
+ /* A long time ago UIDs where 16bit, hence explicitly avoid the 16bit -1 too */
+ if (uid == (uid_t) UINT32_C(0xFFFF))
+ return false;
+
+ return true;
+}
+
+int parse_uid(const char *s, uid_t *ret) {
+ uint32_t uid = 0;
+ int r;
+
+ assert(s);
+
+ assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+ r = safe_atou32(s, &uid);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(uid))
+ return -ENXIO; /* we return ENXIO instead of EINVAL
+ * here, to make it easy to distuingish
+ * invalid numeric uids from invalid
+ * strings. */
+
+ if (ret)
+ *ret = uid;
+
+ return 0;
+}
+
+char* getlogname_malloc(void) {
+ uid_t uid;
+ struct stat st;
+
+ if (isatty(STDIN_FILENO) && fstat(STDIN_FILENO, &st) >= 0)
+ uid = st.st_uid;
+ else
+ uid = getuid();
+
+ return uid_to_name(uid);
+}
+
+char *getusername_malloc(void) {
+ const char *e;
+
+ e = getenv("USER");
+ if (e)
+ return strdup(e);
+
+ return uid_to_name(getuid());
+}
+
+static bool is_nologin_shell(const char *shell) {
+
+ return PATH_IN_SET(shell,
+ /* 'nologin' is the friendliest way to disable logins for a user account. It prints a nice
+ * message and exits. Different distributions place the binary at different places though,
+ * hence let's list them all. */
+ "/bin/nologin",
+ "/sbin/nologin",
+ "/usr/bin/nologin",
+ "/usr/sbin/nologin",
+ /* 'true' and 'false' work too for the same purpose, but are less friendly as they don't do
+ * any message printing. Different distributions place the binary at various places but at
+ * least not in the 'sbin' directory. */
+ "/bin/false",
+ "/usr/bin/false",
+ "/bin/true",
+ "/usr/bin/true");
+}
+
+static int synthesize_user_creds(
+ const char **username,
+ uid_t *uid, gid_t *gid,
+ const char **home,
+ const char **shell,
+ UserCredsFlags flags) {
+
+ /* We enforce some special rules for uid=0 and uid=65534: in order to avoid NSS lookups for root we hardcode
+ * their user record data. */
+
+ if (STR_IN_SET(*username, "root", "0")) {
+ *username = "root";
+
+ if (uid)
+ *uid = 0;
+ if (gid)
+ *gid = 0;
+
+ if (home)
+ *home = "/root";
+
+ if (shell)
+ *shell = "/bin/sh";
+
+ return 0;
+ }
+
+ if (synthesize_nobody() &&
+ STR_IN_SET(*username, NOBODY_USER_NAME, "65534")) {
+ *username = NOBODY_USER_NAME;
+
+ if (uid)
+ *uid = UID_NOBODY;
+ if (gid)
+ *gid = GID_NOBODY;
+
+ if (home)
+ *home = FLAGS_SET(flags, USER_CREDS_CLEAN) ? NULL : "/";
+
+ if (shell)
+ *shell = FLAGS_SET(flags, USER_CREDS_CLEAN) ? NULL : "/sbin/nologin";
+
+ return 0;
+ }
+
+ return -ENOMEDIUM;
+}
+
+int get_user_creds(
+ const char **username,
+ uid_t *uid, gid_t *gid,
+ const char **home,
+ const char **shell,
+ UserCredsFlags flags) {
+
+ uid_t u = UID_INVALID;
+ struct passwd *p;
+ int r;
+
+ assert(username);
+ assert(*username);
+
+ if (!FLAGS_SET(flags, USER_CREDS_PREFER_NSS) ||
+ (!home && !shell)) {
+
+ /* So here's the deal: normally, we'll try to synthesize all records we can synthesize, and override
+ * the user database with that. However, if the user specifies USER_CREDS_PREFER_NSS then the
+ * user database will override the synthetic records instead — except if the user is only interested in
+ * the UID and/or GID (but not the home directory, or the shell), in which case we'll always override
+ * the user database (i.e. the USER_CREDS_PREFER_NSS flag has no effect in this case). Why?
+ * Simply because there are valid usecase where the user might change the home directory or the shell
+ * of the relevant users, but changing the UID/GID mappings for them is something we explicitly don't
+ * support. */
+
+ r = synthesize_user_creds(username, uid, gid, home, shell, flags);
+ if (r >= 0)
+ return 0;
+ if (r != -ENOMEDIUM) /* not a username we can synthesize */
+ return r;
+ }
+
+ if (parse_uid(*username, &u) >= 0) {
+ errno = 0;
+ p = getpwuid(u);
+
+ /* If there are multiple users with the same id, make sure to leave $USER to the configured value
+ * instead of the first occurrence in the database. However if the uid was configured by a numeric uid,
+ * then let's pick the real username from /etc/passwd. */
+ if (p)
+ *username = p->pw_name;
+ else if (FLAGS_SET(flags, USER_CREDS_ALLOW_MISSING) && !gid && !home && !shell) {
+
+ /* If the specified user is a numeric UID and it isn't in the user database, and the caller
+ * passed USER_CREDS_ALLOW_MISSING and was only interested in the UID, then juts return that
+ * and don't complain. */
+
+ if (uid)
+ *uid = u;
+
+ return 0;
+ }
+ } else {
+ errno = 0;
+ p = getpwnam(*username);
+ }
+ if (!p) {
+ r = errno > 0 ? -errno : -ESRCH;
+
+ /* If the user requested that we only synthesize as fallback, do so now */
+ if (FLAGS_SET(flags, USER_CREDS_PREFER_NSS)) {
+ if (synthesize_user_creds(username, uid, gid, home, shell, flags) >= 0)
+ return 0;
+ }
+
+ return r;
+ }
+
+ if (uid) {
+ if (!uid_is_valid(p->pw_uid))
+ return -EBADMSG;
+
+ *uid = p->pw_uid;
+ }
+
+ if (gid) {
+ if (!gid_is_valid(p->pw_gid))
+ return -EBADMSG;
+
+ *gid = p->pw_gid;
+ }
+
+ if (home) {
+ if (FLAGS_SET(flags, USER_CREDS_CLEAN) && empty_or_root(p->pw_dir))
+ *home = NULL;
+ else
+ *home = p->pw_dir;
+ }
+
+ if (shell) {
+ if (FLAGS_SET(flags, USER_CREDS_CLEAN) && (isempty(p->pw_shell) || is_nologin_shell(p->pw_shell)))
+ *shell = NULL;
+ else
+ *shell = p->pw_shell;
+ }
+
+ return 0;
+}
+
+int get_group_creds(const char **groupname, gid_t *gid, UserCredsFlags flags) {
+ struct group *g;
+ gid_t id;
+
+ assert(groupname);
+
+ /* We enforce some special rules for gid=0: in order to avoid NSS lookups for root we hardcode its data. */
+
+ if (STR_IN_SET(*groupname, "root", "0")) {
+ *groupname = "root";
+
+ if (gid)
+ *gid = 0;
+
+ return 0;
+ }
+
+ if (synthesize_nobody() &&
+ STR_IN_SET(*groupname, NOBODY_GROUP_NAME, "65534")) {
+ *groupname = NOBODY_GROUP_NAME;
+
+ if (gid)
+ *gid = GID_NOBODY;
+
+ return 0;
+ }
+
+ if (parse_gid(*groupname, &id) >= 0) {
+ errno = 0;
+ g = getgrgid(id);
+
+ if (g)
+ *groupname = g->gr_name;
+ else if (FLAGS_SET(flags, USER_CREDS_ALLOW_MISSING)) {
+ if (gid)
+ *gid = id;
+
+ return 0;
+ }
+ } else {
+ errno = 0;
+ g = getgrnam(*groupname);
+ }
+
+ if (!g)
+ return errno > 0 ? -errno : -ESRCH;
+
+ if (gid) {
+ if (!gid_is_valid(g->gr_gid))
+ return -EBADMSG;
+
+ *gid = g->gr_gid;
+ }
+
+ return 0;
+}
+
+char* uid_to_name(uid_t uid) {
+ char *ret;
+ int r;
+
+ /* Shortcut things to avoid NSS lookups */
+ if (uid == 0)
+ return strdup("root");
+ if (synthesize_nobody() &&
+ uid == UID_NOBODY)
+ return strdup(NOBODY_USER_NAME);
+
+ if (uid_is_valid(uid)) {
+ long bufsize;
+
+ bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+ if (bufsize <= 0)
+ bufsize = 4096;
+
+ for (;;) {
+ struct passwd pwbuf, *pw = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ buf = malloc(bufsize);
+ if (!buf)
+ return NULL;
+
+ r = getpwuid_r(uid, &pwbuf, buf, (size_t) bufsize, &pw);
+ if (r == 0 && pw)
+ return strdup(pw->pw_name);
+ if (r != ERANGE)
+ break;
+
+ bufsize *= 2;
+ }
+ }
+
+ if (asprintf(&ret, UID_FMT, uid) < 0)
+ return NULL;
+
+ return ret;
+}
+
+char* gid_to_name(gid_t gid) {
+ char *ret;
+ int r;
+
+ if (gid == 0)
+ return strdup("root");
+ if (synthesize_nobody() &&
+ gid == GID_NOBODY)
+ return strdup(NOBODY_GROUP_NAME);
+
+ if (gid_is_valid(gid)) {
+ long bufsize;
+
+ bufsize = sysconf(_SC_GETGR_R_SIZE_MAX);
+ if (bufsize <= 0)
+ bufsize = 4096;
+
+ for (;;) {
+ struct group grbuf, *gr = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ buf = malloc(bufsize);
+ if (!buf)
+ return NULL;
+
+ r = getgrgid_r(gid, &grbuf, buf, (size_t) bufsize, &gr);
+ if (r == 0 && gr)
+ return strdup(gr->gr_name);
+ if (r != ERANGE)
+ break;
+
+ bufsize *= 2;
+ }
+ }
+
+ if (asprintf(&ret, GID_FMT, gid) < 0)
+ return NULL;
+
+ return ret;
+}
+
+int in_gid(gid_t gid) {
+ long ngroups_max;
+ gid_t *gids;
+ int r, i;
+
+ if (getgid() == gid)
+ return 1;
+
+ if (getegid() == gid)
+ return 1;
+
+ if (!gid_is_valid(gid))
+ return -EINVAL;
+
+ ngroups_max = sysconf(_SC_NGROUPS_MAX);
+ assert(ngroups_max > 0);
+
+ gids = newa(gid_t, ngroups_max);
+
+ r = getgroups(ngroups_max, gids);
+ if (r < 0)
+ return -errno;
+
+ for (i = 0; i < r; i++)
+ if (gids[i] == gid)
+ return 1;
+
+ return 0;
+}
+
+int in_group(const char *name) {
+ int r;
+ gid_t gid;
+
+ r = get_group_creds(&name, &gid, 0);
+ if (r < 0)
+ return r;
+
+ return in_gid(gid);
+}
+
+int get_home_dir(char **_h) {
+ struct passwd *p;
+ const char *e;
+ char *h;
+ uid_t u;
+
+ assert(_h);
+
+ /* Take the user specified one */
+ e = secure_getenv("HOME");
+ if (e && path_is_absolute(e)) {
+ h = strdup(e);
+ if (!h)
+ return -ENOMEM;
+
+ *_h = h;
+ return 0;
+ }
+
+ /* Hardcode home directory for root and nobody to avoid NSS */
+ u = getuid();
+ if (u == 0) {
+ h = strdup("/root");
+ if (!h)
+ return -ENOMEM;
+
+ *_h = h;
+ return 0;
+ }
+ if (synthesize_nobody() &&
+ u == UID_NOBODY) {
+ h = strdup("/");
+ if (!h)
+ return -ENOMEM;
+
+ *_h = h;
+ return 0;
+ }
+
+ /* Check the database... */
+ errno = 0;
+ p = getpwuid(u);
+ if (!p)
+ return errno > 0 ? -errno : -ESRCH;
+
+ if (!path_is_absolute(p->pw_dir))
+ return -EINVAL;
+
+ h = strdup(p->pw_dir);
+ if (!h)
+ return -ENOMEM;
+
+ *_h = h;
+ return 0;
+}
+
+int get_shell(char **_s) {
+ struct passwd *p;
+ const char *e;
+ char *s;
+ uid_t u;
+
+ assert(_s);
+
+ /* Take the user specified one */
+ e = getenv("SHELL");
+ if (e) {
+ s = strdup(e);
+ if (!s)
+ return -ENOMEM;
+
+ *_s = s;
+ return 0;
+ }
+
+ /* Hardcode shell for root and nobody to avoid NSS */
+ u = getuid();
+ if (u == 0) {
+ s = strdup("/bin/sh");
+ if (!s)
+ return -ENOMEM;
+
+ *_s = s;
+ return 0;
+ }
+ if (synthesize_nobody() &&
+ u == UID_NOBODY) {
+ s = strdup("/sbin/nologin");
+ if (!s)
+ return -ENOMEM;
+
+ *_s = s;
+ return 0;
+ }
+
+ /* Check the database... */
+ errno = 0;
+ p = getpwuid(u);
+ if (!p)
+ return errno > 0 ? -errno : -ESRCH;
+
+ if (!path_is_absolute(p->pw_shell))
+ return -EINVAL;
+
+ s = strdup(p->pw_shell);
+ if (!s)
+ return -ENOMEM;
+
+ *_s = s;
+ return 0;
+}
+
+int reset_uid_gid(void) {
+ int r;
+
+ r = maybe_setgroups(0, NULL);
+ if (r < 0)
+ return r;
+
+ if (setresgid(0, 0, 0) < 0)
+ return -errno;
+
+ if (setresuid(0, 0, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int take_etc_passwd_lock(const char *root) {
+
+ struct flock flock = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = 0,
+ .l_len = 0,
+ };
+
+ const char *path;
+ int fd, r;
+
+ /* This is roughly the same as lckpwdf(), but not as awful. We
+ * don't want to use alarm() and signals, hence we implement
+ * our own trivial version of this.
+ *
+ * Note that shadow-utils also takes per-database locks in
+ * addition to lckpwdf(). However, we don't given that they
+ * are redundant as they invoke lckpwdf() first and keep
+ * it during everything they do. The per-database locks are
+ * awfully racy, and thus we just won't do them. */
+
+ if (root)
+ path = prefix_roota(root, ETC_PASSWD_LOCK_PATH);
+ else
+ path = ETC_PASSWD_LOCK_PATH;
+
+ fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600);
+ if (fd < 0)
+ return log_debug_errno(errno, "Cannot open %s: %m", path);
+
+ r = fcntl(fd, F_SETLKW, &flock);
+ if (r < 0) {
+ safe_close(fd);
+ return log_debug_errno(errno, "Locking %s failed: %m", path);
+ }
+
+ return fd;
+}
+
+bool valid_user_group_name(const char *u) {
+ const char *i;
+ long sz;
+
+ /* Checks if the specified name is a valid user/group name. Also see POSIX IEEE Std 1003.1-2008, 2016 Edition,
+ * 3.437. We are a bit stricter here however. Specifically we deviate from POSIX rules:
+ *
+ * - We don't allow any dots (this would break chown syntax which permits dots as user/group name separator)
+ * - We require that names fit into the appropriate utmp field
+ * - We don't allow empty user names
+ *
+ * Note that other systems are even more restrictive, and don't permit underscores or uppercase characters.
+ */
+
+ if (isempty(u))
+ return false;
+
+ if (!(u[0] >= 'a' && u[0] <= 'z') &&
+ !(u[0] >= 'A' && u[0] <= 'Z') &&
+ u[0] != '_')
+ return false;
+
+ for (i = u+1; *i; i++) {
+ if (!(*i >= 'a' && *i <= 'z') &&
+ !(*i >= 'A' && *i <= 'Z') &&
+ !(*i >= '0' && *i <= '9') &&
+ !IN_SET(*i, '_', '-'))
+ return false;
+ }
+
+ sz = sysconf(_SC_LOGIN_NAME_MAX);
+ assert_se(sz > 0);
+
+ if ((size_t) (i-u) > (size_t) sz)
+ return false;
+
+ if ((size_t) (i-u) > UT_NAMESIZE - 1)
+ return false;
+
+ return true;
+}
+
+bool valid_user_group_name_or_id(const char *u) {
+
+ /* Similar as above, but is also fine with numeric UID/GID specifications, as long as they are in the right
+ * range, and not the invalid user ids. */
+
+ if (isempty(u))
+ return false;
+
+ if (valid_user_group_name(u))
+ return true;
+
+ return parse_uid(u, NULL) >= 0;
+}
+
+bool valid_gecos(const char *d) {
+
+ if (!d)
+ return false;
+
+ if (!utf8_is_valid(d))
+ return false;
+
+ if (string_has_cc(d, NULL))
+ return false;
+
+ /* Colons are used as field separators, and hence not OK */
+ if (strchr(d, ':'))
+ return false;
+
+ return true;
+}
+
+bool valid_home(const char *p) {
+ /* Note that this function is also called by valid_shell(), any
+ * changes must account for that. */
+
+ if (isempty(p))
+ return false;
+
+ if (!utf8_is_valid(p))
+ return false;
+
+ if (string_has_cc(p, NULL))
+ return false;
+
+ if (!path_is_absolute(p))
+ return false;
+
+ if (!path_is_normalized(p))
+ return false;
+
+ /* Colons are used as field separators, and hence not OK */
+ if (strchr(p, ':'))
+ return false;
+
+ return true;
+}
+
+int maybe_setgroups(size_t size, const gid_t *list) {
+ int r;
+
+ /* Check if setgroups is allowed before we try to drop all the auxiliary groups */
+ if (size == 0) { /* Dropping all aux groups? */
+ _cleanup_free_ char *setgroups_content = NULL;
+ bool can_setgroups;
+
+ r = read_one_line_file("/proc/self/setgroups", &setgroups_content);
+ if (r == -ENOENT)
+ /* Old kernels don't have /proc/self/setgroups, so assume we can use setgroups */
+ can_setgroups = true;
+ else if (r < 0)
+ return r;
+ else
+ can_setgroups = streq(setgroups_content, "allow");
+
+ if (!can_setgroups) {
+ log_debug("Skipping setgroups(), /proc/self/setgroups is set to 'deny'");
+ return 0;
+ }
+ }
+
+ if (setgroups(size, list) < 0)
+ return -errno;
+
+ return 0;
+}
+
+bool synthesize_nobody(void) {
+ /* Returns true when we shall synthesize the "nobody" user (which we do by default). This can be turned off by
+ * touching /etc/systemd/dont-synthesize-nobody in order to provide upgrade compatibility with legacy systems
+ * that used the "nobody" user name and group name for other UIDs/GIDs than 65534.
+ *
+ * Note that we do not employ any kind of synchronization on the following caching variable. If the variable is
+ * accessed in multi-threaded programs in the worst case it might happen that we initialize twice, but that
+ * shouldn't matter as each initialization should come to the same result. */
+ static int cache = -1;
+
+ if (cache < 0)
+ cache = access("/etc/systemd/dont-synthesize-nobody", F_OK) < 0;
+
+ return cache;
+}
+
+int putpwent_sane(const struct passwd *pw, FILE *stream) {
+ assert(pw);
+ assert(stream);
+
+ errno = 0;
+ if (putpwent(pw, stream) != 0)
+ return errno > 0 ? -errno : -EIO;
+
+ return 0;
+}
+
+int putspent_sane(const struct spwd *sp, FILE *stream) {
+ assert(sp);
+ assert(stream);
+
+ errno = 0;
+ if (putspent(sp, stream) != 0)
+ return errno > 0 ? -errno : -EIO;
+
+ return 0;
+}
+
+int putgrent_sane(const struct group *gr, FILE *stream) {
+ assert(gr);
+ assert(stream);
+
+ errno = 0;
+ if (putgrent(gr, stream) != 0)
+ return errno > 0 ? -errno : -EIO;
+
+ return 0;
+}
+
+#if ENABLE_GSHADOW
+int putsgent_sane(const struct sgrp *sg, FILE *stream) {
+ assert(sg);
+ assert(stream);
+
+ errno = 0;
+ if (putsgent(sg, stream) != 0)
+ return errno > 0 ? -errno : -EIO;
+
+ return 0;
+}
+#endif
+
+int fgetpwent_sane(FILE *stream, struct passwd **pw) {
+ struct passwd *p;
+
+ assert(pw);
+ assert(stream);
+
+ errno = 0;
+ p = fgetpwent(stream);
+ if (!p && errno != ENOENT)
+ return errno > 0 ? -errno : -EIO;
+
+ *pw = p;
+ return !!p;
+}
+
+int fgetspent_sane(FILE *stream, struct spwd **sp) {
+ struct spwd *s;
+
+ assert(sp);
+ assert(stream);
+
+ errno = 0;
+ s = fgetspent(stream);
+ if (!s && errno != ENOENT)
+ return errno > 0 ? -errno : -EIO;
+
+ *sp = s;
+ return !!s;
+}
+
+int fgetgrent_sane(FILE *stream, struct group **gr) {
+ struct group *g;
+
+ assert(gr);
+ assert(stream);
+
+ errno = 0;
+ g = fgetgrent(stream);
+ if (!g && errno != ENOENT)
+ return errno > 0 ? -errno : -EIO;
+
+ *gr = g;
+ return !!g;
+}
+
+#if ENABLE_GSHADOW
+int fgetsgent_sane(FILE *stream, struct sgrp **sg) {
+ struct sgrp *s;
+
+ assert(sg);
+ assert(stream);
+
+ errno = 0;
+ s = fgetsgent(stream);
+ if (!s && errno != ENOENT)
+ return errno > 0 ? -errno : -EIO;
+
+ *sg = s;
+ return !!s;
+}
+#endif
diff --git a/src/basic/user-util.h b/src/basic/user-util.h
new file mode 100644
index 0000000..cc899ee
--- /dev/null
+++ b/src/basic/user-util.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <grp.h>
+#if ENABLE_GSHADOW
+#include <gshadow.h>
+#endif
+#include <pwd.h>
+#include <shadow.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+bool uid_is_valid(uid_t uid);
+
+static inline bool gid_is_valid(gid_t gid) {
+ return uid_is_valid((uid_t) gid);
+}
+
+int parse_uid(const char *s, uid_t* ret_uid);
+
+static inline int parse_gid(const char *s, gid_t *ret_gid) {
+ return parse_uid(s, (uid_t*) ret_gid);
+}
+
+char* getlogname_malloc(void);
+char* getusername_malloc(void);
+
+typedef enum UserCredsFlags {
+ USER_CREDS_PREFER_NSS = 1 << 0, /* if set, only synthesize user records if database lacks them. Normally we bypass the userdb entirely for the records we can synthesize */
+ USER_CREDS_ALLOW_MISSING = 1 << 1, /* if a numeric UID string is resolved, be OK if there's no record for it */
+ USER_CREDS_CLEAN = 1 << 2, /* try to clean up shell and home fields with invalid data */
+} UserCredsFlags;
+
+int get_user_creds(const char **username, uid_t *uid, gid_t *gid, const char **home, const char **shell, UserCredsFlags flags);
+int get_group_creds(const char **groupname, gid_t *gid, UserCredsFlags flags);
+
+char* uid_to_name(uid_t uid);
+char* gid_to_name(gid_t gid);
+
+int in_gid(gid_t gid);
+int in_group(const char *name);
+
+int get_home_dir(char **ret);
+int get_shell(char **_ret);
+
+int reset_uid_gid(void);
+
+int take_etc_passwd_lock(const char *root);
+
+#define UID_INVALID ((uid_t) -1)
+#define GID_INVALID ((gid_t) -1)
+
+#define UID_NOBODY ((uid_t) 65534U)
+#define GID_NOBODY ((gid_t) 65534U)
+
+#define ETC_PASSWD_LOCK_PATH "/etc/.pwd.lock"
+
+static inline bool uid_is_dynamic(uid_t uid) {
+ return DYNAMIC_UID_MIN <= uid && uid <= DYNAMIC_UID_MAX;
+}
+
+static inline bool gid_is_dynamic(gid_t gid) {
+ return uid_is_dynamic((uid_t) gid);
+}
+
+static inline bool uid_is_system(uid_t uid) {
+ return uid <= SYSTEM_UID_MAX;
+}
+
+static inline bool gid_is_system(gid_t gid) {
+ return gid <= SYSTEM_GID_MAX;
+}
+
+/* The following macros add 1 when converting things, since UID 0 is a valid UID, while the pointer
+ * NULL is special */
+#define PTR_TO_UID(p) ((uid_t) (((uintptr_t) (p))-1))
+#define UID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
+
+#define PTR_TO_GID(p) ((gid_t) (((uintptr_t) (p))-1))
+#define GID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
+
+static inline bool userns_supported(void) {
+ return access("/proc/self/uid_map", F_OK) >= 0;
+}
+
+bool valid_user_group_name(const char *u);
+bool valid_user_group_name_or_id(const char *u);
+bool valid_gecos(const char *d);
+bool valid_home(const char *p);
+
+static inline bool valid_shell(const char *p) {
+ /* We have the same requirements, so just piggy-back on the home check.
+ *
+ * Let's ignore /etc/shells because this is only applicable to real and
+ * not system users. It is also incompatible with the idea of empty /etc.
+ */
+ return valid_home(p);
+}
+
+int maybe_setgroups(size_t size, const gid_t *list);
+
+bool synthesize_nobody(void);
+
+int fgetpwent_sane(FILE *stream, struct passwd **pw);
+int fgetspent_sane(FILE *stream, struct spwd **sp);
+int fgetgrent_sane(FILE *stream, struct group **gr);
+int putpwent_sane(const struct passwd *pw, FILE *stream);
+int putspent_sane(const struct spwd *sp, FILE *stream);
+int putgrent_sane(const struct group *gr, FILE *stream);
+#if ENABLE_GSHADOW
+int fgetsgent_sane(FILE *stream, struct sgrp **sg);
+int putsgent_sane(const struct sgrp *sg, FILE *stream);
+#endif
diff --git a/src/basic/utf8.c b/src/basic/utf8.c
new file mode 100644
index 0000000..e0d1949
--- /dev/null
+++ b/src/basic/utf8.c
@@ -0,0 +1,532 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+/* Parts of this file are based on the GLIB utf8 validation functions. The
+ * original license text follows. */
+
+/* gutf8.c - Operations on UTF-8 strings.
+ *
+ * Copyright (C) 1999 Tom Tromey
+ * Copyright (C) 2000 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "gunicode.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "utf8.h"
+
+bool unichar_is_valid(char32_t ch) {
+
+ if (ch >= 0x110000) /* End of unicode space */
+ return false;
+ if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
+ return false;
+ if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */
+ return false;
+ if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
+ return false;
+
+ return true;
+}
+
+static bool unichar_is_control(char32_t ch) {
+
+ /*
+ 0 to ' '-1 is the C0 range.
+ DEL=0x7F, and DEL+1 to 0x9F is C1 range.
+ '\t' is in C0 range, but more or less harmless and commonly used.
+ */
+
+ return (ch < ' ' && !IN_SET(ch, '\t', '\n')) ||
+ (0x7F <= ch && ch <= 0x9F);
+}
+
+/* count of characters used to encode one unicode char */
+static size_t utf8_encoded_expected_len(const char *str) {
+ uint8_t c;
+
+ assert(str);
+
+ c = (uint8_t) str[0];
+ if (c < 0x80)
+ return 1;
+ if ((c & 0xe0) == 0xc0)
+ return 2;
+ if ((c & 0xf0) == 0xe0)
+ return 3;
+ if ((c & 0xf8) == 0xf0)
+ return 4;
+ if ((c & 0xfc) == 0xf8)
+ return 5;
+ if ((c & 0xfe) == 0xfc)
+ return 6;
+
+ return 0;
+}
+
+/* decode one unicode char */
+int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar) {
+ char32_t unichar;
+ size_t len, i;
+
+ assert(str);
+
+ len = utf8_encoded_expected_len(str);
+
+ switch (len) {
+ case 1:
+ *ret_unichar = (char32_t)str[0];
+ return 0;
+ case 2:
+ unichar = str[0] & 0x1f;
+ break;
+ case 3:
+ unichar = (char32_t)str[0] & 0x0f;
+ break;
+ case 4:
+ unichar = (char32_t)str[0] & 0x07;
+ break;
+ case 5:
+ unichar = (char32_t)str[0] & 0x03;
+ break;
+ case 6:
+ unichar = (char32_t)str[0] & 0x01;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 1; i < len; i++) {
+ if (((char32_t)str[i] & 0xc0) != 0x80)
+ return -EINVAL;
+
+ unichar <<= 6;
+ unichar |= (char32_t)str[i] & 0x3f;
+ }
+
+ *ret_unichar = unichar;
+
+ return 0;
+}
+
+bool utf8_is_printable_newline(const char* str, size_t length, bool newline) {
+ const char *p;
+
+ assert(str);
+
+ for (p = str; length;) {
+ int encoded_len, r;
+ char32_t val;
+
+ encoded_len = utf8_encoded_valid_unichar(p);
+ if (encoded_len < 0 ||
+ (size_t) encoded_len > length)
+ return false;
+
+ r = utf8_encoded_to_unichar(p, &val);
+ if (r < 0 ||
+ unichar_is_control(val) ||
+ (!newline && val == '\n'))
+ return false;
+
+ length -= encoded_len;
+ p += encoded_len;
+ }
+
+ return true;
+}
+
+char *utf8_is_valid(const char *str) {
+ const char *p;
+
+ assert(str);
+
+ p = str;
+ while (*p) {
+ int len;
+
+ len = utf8_encoded_valid_unichar(p);
+ if (len < 0)
+ return NULL;
+
+ p += len;
+ }
+
+ return (char*) str;
+}
+
+char *utf8_escape_invalid(const char *str) {
+ char *p, *s;
+
+ assert(str);
+
+ p = s = malloc(strlen(str) * 4 + 1);
+ if (!p)
+ return NULL;
+
+ while (*str) {
+ int len;
+
+ len = utf8_encoded_valid_unichar(str);
+ if (len > 0) {
+ s = mempcpy(s, str, len);
+ str += len;
+ } else {
+ s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER);
+ str += 1;
+ }
+ }
+
+ *s = '\0';
+
+ return p;
+}
+
+char *utf8_escape_non_printable(const char *str) {
+ char *p, *s;
+
+ assert(str);
+
+ p = s = malloc(strlen(str) * 4 + 1);
+ if (!p)
+ return NULL;
+
+ while (*str) {
+ int len;
+
+ len = utf8_encoded_valid_unichar(str);
+ if (len > 0) {
+ if (utf8_is_printable(str, len)) {
+ s = mempcpy(s, str, len);
+ str += len;
+ } else {
+ while (len > 0) {
+ *(s++) = '\\';
+ *(s++) = 'x';
+ *(s++) = hexchar((int) *str >> 4);
+ *(s++) = hexchar((int) *str);
+
+ str += 1;
+ len--;
+ }
+ }
+ } else {
+ s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER);
+ str += 1;
+ }
+ }
+
+ *s = '\0';
+
+ return p;
+}
+
+char *ascii_is_valid(const char *str) {
+ const char *p;
+
+ /* Check whether the string consists of valid ASCII bytes,
+ * i.e values between 0 and 127, inclusive. */
+
+ assert(str);
+
+ for (p = str; *p; p++)
+ if ((unsigned char) *p >= 128)
+ return NULL;
+
+ return (char*) str;
+}
+
+char *ascii_is_valid_n(const char *str, size_t len) {
+ size_t i;
+
+ /* Very similar to ascii_is_valid(), but checks exactly len
+ * bytes and rejects any NULs in that range. */
+
+ assert(str);
+
+ for (i = 0; i < len; i++)
+ if ((unsigned char) str[i] >= 128 || str[i] == 0)
+ return NULL;
+
+ return (char*) str;
+}
+
+/**
+ * utf8_encode_unichar() - Encode single UCS-4 character as UTF-8
+ * @out_utf8: output buffer of at least 4 bytes or NULL
+ * @g: UCS-4 character to encode
+ *
+ * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8.
+ * The length of the character is returned. It is not zero-terminated! If the
+ * output buffer is NULL, only the length is returned.
+ *
+ * Returns: The length in bytes that the UTF-8 representation does or would
+ * occupy.
+ */
+size_t utf8_encode_unichar(char *out_utf8, char32_t g) {
+
+ if (g < (1 << 7)) {
+ if (out_utf8)
+ out_utf8[0] = g & 0x7f;
+ return 1;
+ } else if (g < (1 << 11)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f);
+ out_utf8[1] = 0x80 | (g & 0x3f);
+ }
+ return 2;
+ } else if (g < (1 << 16)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f);
+ out_utf8[1] = 0x80 | ((g >> 6) & 0x3f);
+ out_utf8[2] = 0x80 | (g & 0x3f);
+ }
+ return 3;
+ } else if (g < (1 << 21)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xf0 | ((g >> 18) & 0x07);
+ out_utf8[1] = 0x80 | ((g >> 12) & 0x3f);
+ out_utf8[2] = 0x80 | ((g >> 6) & 0x3f);
+ out_utf8[3] = 0x80 | (g & 0x3f);
+ }
+ return 4;
+ }
+
+ return 0;
+}
+
+char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */) {
+ const uint8_t *f;
+ char *r, *t;
+
+ assert(s);
+
+ /* Input length is in bytes, i.e. the shortest possible character takes 2 bytes. Each unicode character may
+ * take up to 4 bytes in UTF-8. Let's also account for a trailing NUL byte. */
+ if (length * 2 < length)
+ return NULL; /* overflow */
+
+ r = new(char, length * 2 + 1);
+ if (!r)
+ return NULL;
+
+ f = (const uint8_t*) s;
+ t = r;
+
+ while (f + 1 < (const uint8_t*) s + length) {
+ char16_t w1, w2;
+
+ /* see RFC 2781 section 2.2 */
+
+ w1 = f[1] << 8 | f[0];
+ f += 2;
+
+ if (!utf16_is_surrogate(w1)) {
+ t += utf8_encode_unichar(t, w1);
+ continue;
+ }
+
+ if (utf16_is_trailing_surrogate(w1))
+ continue; /* spurious trailing surrogate, ignore */
+
+ if (f + 1 >= (const uint8_t*) s + length)
+ break;
+
+ w2 = f[1] << 8 | f[0];
+ f += 2;
+
+ if (!utf16_is_trailing_surrogate(w2)) {
+ f -= 2;
+ continue; /* surrogate missing its trailing surrogate, ignore */
+ }
+
+ t += utf8_encode_unichar(t, utf16_surrogate_pair_to_unichar(w1, w2));
+ }
+
+ *t = 0;
+ return r;
+}
+
+size_t utf16_encode_unichar(char16_t *out, char32_t c) {
+
+ /* Note that this encodes as little-endian. */
+
+ switch (c) {
+
+ case 0 ... 0xd7ffU:
+ case 0xe000U ... 0xffffU:
+ out[0] = htole16(c);
+ return 1;
+
+ case 0x10000U ... 0x10ffffU:
+ c -= 0x10000U;
+ out[0] = htole16((c >> 10) + 0xd800U);
+ out[1] = htole16((c & 0x3ffU) + 0xdc00U);
+ return 2;
+
+ default: /* A surrogate (invalid) */
+ return 0;
+ }
+}
+
+char16_t *utf8_to_utf16(const char *s, size_t length) {
+ char16_t *n, *p;
+ size_t i;
+ int r;
+
+ assert(s);
+
+ n = new(char16_t, length + 1);
+ if (!n)
+ return NULL;
+
+ p = n;
+
+ for (i = 0; i < length;) {
+ char32_t unichar;
+ size_t e;
+
+ e = utf8_encoded_expected_len(s + i);
+ if (e <= 1) /* Invalid and single byte characters are copied as they are */
+ goto copy;
+
+ if (i + e > length) /* sequence longer than input buffer, then copy as-is */
+ goto copy;
+
+ r = utf8_encoded_to_unichar(s + i, &unichar);
+ if (r < 0) /* sequence invalid, then copy as-is */
+ goto copy;
+
+ p += utf16_encode_unichar(p, unichar);
+ i += e;
+ continue;
+
+ copy:
+ *(p++) = htole16(s[i++]);
+ }
+
+ *p = 0;
+ return n;
+}
+
+size_t char16_strlen(const char16_t *s) {
+ size_t n = 0;
+
+ assert(s);
+
+ while (*s != 0)
+ n++, s++;
+
+ return n;
+}
+
+/* expected size used to encode one unicode char */
+static int utf8_unichar_to_encoded_len(char32_t unichar) {
+
+ if (unichar < 0x80)
+ return 1;
+ if (unichar < 0x800)
+ return 2;
+ if (unichar < 0x10000)
+ return 3;
+ if (unichar < 0x200000)
+ return 4;
+ if (unichar < 0x4000000)
+ return 5;
+
+ return 6;
+}
+
+/* validate one encoded unicode char and return its length */
+int utf8_encoded_valid_unichar(const char *str) {
+ char32_t unichar;
+ size_t len, i;
+ int r;
+
+ assert(str);
+
+ len = utf8_encoded_expected_len(str);
+ if (len == 0)
+ return -EINVAL;
+
+ /* ascii is valid */
+ if (len == 1)
+ return 1;
+
+ /* check if expected encoded chars are available */
+ for (i = 0; i < len; i++)
+ if ((str[i] & 0x80) != 0x80)
+ return -EINVAL;
+
+ r = utf8_encoded_to_unichar(str, &unichar);
+ if (r < 0)
+ return r;
+
+ /* check if encoded length matches encoded value */
+ if (utf8_unichar_to_encoded_len(unichar) != (int) len)
+ return -EINVAL;
+
+ /* check if value has valid range */
+ if (!unichar_is_valid(unichar))
+ return -EINVAL;
+
+ return (int) len;
+}
+
+size_t utf8_n_codepoints(const char *str) {
+ size_t n = 0;
+
+ /* Returns the number of UTF-8 codepoints in this string, or (size_t) -1 if the string is not valid UTF-8. */
+
+ while (*str != 0) {
+ int k;
+
+ k = utf8_encoded_valid_unichar(str);
+ if (k < 0)
+ return (size_t) -1;
+
+ str += k;
+ n++;
+ }
+
+ return n;
+}
+
+size_t utf8_console_width(const char *str) {
+ size_t n = 0;
+
+ /* Returns the approximate width a string will take on screen when printed on a character cell
+ * terminal/console. */
+
+ while (*str != 0) {
+ char32_t c;
+
+ if (utf8_encoded_to_unichar(str, &c) < 0)
+ return (size_t) -1;
+
+ str = utf8_next_char(str);
+
+ n += unichar_iswide(c) ? 2 : 1;
+ }
+
+ return n;
+}
diff --git a/src/basic/utf8.h b/src/basic/utf8.h
new file mode 100644
index 0000000..6284569
--- /dev/null
+++ b/src/basic/utf8.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <uchar.h>
+
+#include "macro.h"
+#include "missing_type.h"
+
+#define UTF8_REPLACEMENT_CHARACTER "\xef\xbf\xbd"
+#define UTF8_BYTE_ORDER_MARK "\xef\xbb\xbf"
+
+bool unichar_is_valid(char32_t c);
+
+char *utf8_is_valid(const char *s) _pure_;
+char *ascii_is_valid(const char *s) _pure_;
+char *ascii_is_valid_n(const char *str, size_t len);
+
+bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pure_;
+#define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true)
+
+char *utf8_escape_invalid(const char *s);
+char *utf8_escape_non_printable(const char *str);
+
+size_t utf8_encode_unichar(char *out_utf8, char32_t g);
+size_t utf16_encode_unichar(char16_t *out, char32_t c);
+
+char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */);
+char16_t *utf8_to_utf16(const char *s, size_t length);
+
+size_t char16_strlen(const char16_t *s); /* returns the number of 16bit words in the string (not bytes!) */
+
+int utf8_encoded_valid_unichar(const char *str);
+int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar);
+
+static inline bool utf16_is_surrogate(char16_t c) {
+ return c >= 0xd800U && c <= 0xdfffU;
+}
+
+static inline bool utf16_is_trailing_surrogate(char16_t c) {
+ return c >= 0xdc00U && c <= 0xdfffU;
+}
+
+static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t trail) {
+ return ((((char32_t) lead - 0xd800U) << 10) + ((char32_t) trail - 0xdc00U) + 0x10000U);
+}
+
+size_t utf8_n_codepoints(const char *str);
+size_t utf8_console_width(const char *str);
diff --git a/src/basic/util.c b/src/basic/util.c
new file mode 100644
index 0000000..e577c93
--- /dev/null
+++ b/src/basic/util.c
@@ -0,0 +1,637 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/statfs.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "build.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "device-nodes.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "set.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+int saved_argc = 0;
+char **saved_argv = NULL;
+static int saved_in_initrd = -1;
+
+size_t page_size(void) {
+ static thread_local size_t pgsz = 0;
+ long r;
+
+ if (_likely_(pgsz > 0))
+ return pgsz;
+
+ r = sysconf(_SC_PAGESIZE);
+ assert(r > 0);
+
+ pgsz = (size_t) r;
+ return pgsz;
+}
+
+bool plymouth_running(void) {
+ return access("/run/plymouth/pid", F_OK) >= 0;
+}
+
+bool display_is_local(const char *display) {
+ assert(display);
+
+ return
+ display[0] == ':' &&
+ display[1] >= '0' &&
+ display[1] <= '9';
+}
+
+bool kexec_loaded(void) {
+ _cleanup_free_ char *s = NULL;
+
+ if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
+ return false;
+
+ return s[0] == '1';
+}
+
+int prot_from_flags(int flags) {
+
+ switch (flags & O_ACCMODE) {
+
+ case O_RDONLY:
+ return PROT_READ;
+
+ case O_WRONLY:
+ return PROT_WRITE;
+
+ case O_RDWR:
+ return PROT_READ|PROT_WRITE;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+bool in_initrd(void) {
+ struct statfs s;
+ int r;
+
+ if (saved_in_initrd >= 0)
+ return saved_in_initrd;
+
+ /* We make two checks here:
+ *
+ * 1. the flag file /etc/initrd-release must exist
+ * 2. the root file system must be a memory file system
+ *
+ * The second check is extra paranoia, since misdetecting an
+ * initrd can have bad consequences due the initrd
+ * emptying when transititioning to the main systemd.
+ */
+
+ r = getenv_bool_secure("SYSTEMD_IN_INITRD");
+ if (r < 0 && r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m");
+
+ if (r >= 0)
+ saved_in_initrd = r > 0;
+ else
+ saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
+ statfs("/", &s) >= 0 &&
+ is_temporary_fs(&s);
+
+ return saved_in_initrd;
+}
+
+void in_initrd_force(bool value) {
+ saved_in_initrd = value;
+}
+
+/* hey glibc, APIs with callbacks without a user pointer are so useless */
+void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
+ __compar_d_fn_t compar, void *arg) {
+ size_t l, u, idx;
+ const void *p;
+ int comparison;
+
+ assert(!size_multiply_overflow(nmemb, size));
+
+ l = 0;
+ u = nmemb;
+ while (l < u) {
+ idx = (l + u) / 2;
+ p = (const uint8_t*) base + idx * size;
+ comparison = compar(key, p, arg);
+ if (comparison < 0)
+ u = idx;
+ else if (comparison > 0)
+ l = idx + 1;
+ else
+ return (void *)p;
+ }
+ return NULL;
+}
+
+bool memeqzero(const void *data, size_t length) {
+ /* Does the buffer consist entirely of NULs?
+ * Copied from https://github.com/systemd/casync/, copied in turn from
+ * https://github.com/rustyrussell/ccan/blob/master/ccan/mem/mem.c#L92,
+ * which is licensed CC-0.
+ */
+
+ const uint8_t *p = data;
+ size_t i;
+
+ /* Check first 16 bytes manually */
+ for (i = 0; i < 16; i++, length--) {
+ if (length == 0)
+ return true;
+ if (p[i])
+ return false;
+ }
+
+ /* Now we know first 16 bytes are NUL, memcmp with self. */
+ return memcmp(data, p + i, length) == 0;
+}
+
+int on_ac_power(void) {
+ bool found_offline = false, found_online = false;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir("/sys/class/power_supply");
+ if (!d)
+ return errno == ENOENT ? true : -errno;
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_close_ int fd = -1, device = -1;
+ char contents[6];
+ ssize_t n;
+
+ device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (device < 0) {
+ if (IN_SET(errno, ENOENT, ENOTDIR))
+ continue;
+
+ return -errno;
+ }
+
+ fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ n = read(fd, contents, sizeof(contents));
+ if (n < 0)
+ return -errno;
+
+ if (n != 6 || memcmp(contents, "Mains\n", 6))
+ continue;
+
+ safe_close(fd);
+ fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ n = read(fd, contents, sizeof(contents));
+ if (n < 0)
+ return -errno;
+
+ if (n != 2 || contents[1] != '\n')
+ return -EIO;
+
+ if (contents[0] == '1') {
+ found_online = true;
+ break;
+ } else if (contents[0] == '0')
+ found_offline = true;
+ else
+ return -EIO;
+ }
+
+ return found_online || !found_offline;
+}
+
+int container_get_leader(const char *machine, pid_t *pid) {
+ _cleanup_free_ char *s = NULL, *class = NULL;
+ const char *p;
+ pid_t leader;
+ int r;
+
+ assert(machine);
+ assert(pid);
+
+ if (streq(machine, ".host")) {
+ *pid = 1;
+ return 0;
+ }
+
+ if (!machine_name_is_valid(machine))
+ return -EINVAL;
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p,
+ "LEADER", &s,
+ "CLASS", &class);
+ if (r == -ENOENT)
+ return -EHOSTDOWN;
+ if (r < 0)
+ return r;
+ if (!s)
+ return -EIO;
+
+ if (!streq_ptr(class, "container"))
+ return -EIO;
+
+ r = parse_pid(s, &leader);
+ if (r < 0)
+ return r;
+ if (leader <= 1)
+ return -EIO;
+
+ *pid = leader;
+ return 0;
+}
+
+int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
+ int rfd = -1;
+
+ assert(pid >= 0);
+
+ if (mntns_fd) {
+ const char *mntns;
+
+ mntns = procfs_file_alloca(pid, "ns/mnt");
+ mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (mntnsfd < 0)
+ return -errno;
+ }
+
+ if (pidns_fd) {
+ const char *pidns;
+
+ pidns = procfs_file_alloca(pid, "ns/pid");
+ pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (pidnsfd < 0)
+ return -errno;
+ }
+
+ if (netns_fd) {
+ const char *netns;
+
+ netns = procfs_file_alloca(pid, "ns/net");
+ netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (netnsfd < 0)
+ return -errno;
+ }
+
+ if (userns_fd) {
+ const char *userns;
+
+ userns = procfs_file_alloca(pid, "ns/user");
+ usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (usernsfd < 0 && errno != ENOENT)
+ return -errno;
+ }
+
+ if (root_fd) {
+ const char *root;
+
+ root = procfs_file_alloca(pid, "root");
+ rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (rfd < 0)
+ return -errno;
+ }
+
+ if (pidns_fd)
+ *pidns_fd = pidnsfd;
+
+ if (mntns_fd)
+ *mntns_fd = mntnsfd;
+
+ if (netns_fd)
+ *netns_fd = netnsfd;
+
+ if (userns_fd)
+ *userns_fd = usernsfd;
+
+ if (root_fd)
+ *root_fd = rfd;
+
+ pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
+
+ return 0;
+}
+
+int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
+ if (userns_fd >= 0) {
+ /* Can't setns to your own userns, since then you could
+ * escalate from non-root to root in your own namespace, so
+ * check if namespaces equal before attempting to enter. */
+ _cleanup_free_ char *userns_fd_path = NULL;
+ int r;
+ if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
+ return -ENOMEM;
+
+ r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
+ if (r < 0)
+ return r;
+ if (r)
+ userns_fd = -1;
+ }
+
+ if (pidns_fd >= 0)
+ if (setns(pidns_fd, CLONE_NEWPID) < 0)
+ return -errno;
+
+ if (mntns_fd >= 0)
+ if (setns(mntns_fd, CLONE_NEWNS) < 0)
+ return -errno;
+
+ if (netns_fd >= 0)
+ if (setns(netns_fd, CLONE_NEWNET) < 0)
+ return -errno;
+
+ if (userns_fd >= 0)
+ if (setns(userns_fd, CLONE_NEWUSER) < 0)
+ return -errno;
+
+ if (root_fd >= 0) {
+ if (fchdir(root_fd) < 0)
+ return -errno;
+
+ if (chroot(".") < 0)
+ return -errno;
+ }
+
+ return reset_uid_gid();
+}
+
+uint64_t physical_memory(void) {
+ _cleanup_free_ char *root = NULL, *value = NULL;
+ uint64_t mem, lim;
+ size_t ps;
+ long sc;
+ int r;
+
+ /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
+ * memory.
+ *
+ * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
+ * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
+
+ sc = sysconf(_SC_PHYS_PAGES);
+ assert(sc > 0);
+
+ ps = page_size();
+ mem = (uint64_t) sc * (uint64_t) ps;
+
+ r = cg_get_root_path(&root);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+
+ r = cg_all_unified();
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+ if (r > 0) {
+ r = cg_get_attribute("memory", root, "memory.max", &value);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+
+ if (streq(value, "max"))
+ return mem;
+ } else {
+ r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+ }
+
+ r = safe_atou64(value, &lim);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
+ return mem;
+ }
+ if (lim == UINT64_MAX)
+ return mem;
+
+ /* Make sure the limit is a multiple of our own page size */
+ lim /= ps;
+ lim *= ps;
+
+ return MIN(mem, lim);
+}
+
+uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
+ uint64_t p, m, ps, r;
+
+ assert(max > 0);
+
+ /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
+ * the result is a multiple of the page size (rounds down). */
+
+ ps = page_size();
+ assert(ps > 0);
+
+ p = physical_memory() / ps;
+ assert(p > 0);
+
+ m = p * v;
+ if (m / p != v)
+ return UINT64_MAX;
+
+ m /= max;
+
+ r = m * ps;
+ if (r / ps != m)
+ return UINT64_MAX;
+
+ return r;
+}
+
+uint64_t system_tasks_max(void) {
+
+ uint64_t a = TASKS_MAX, b = TASKS_MAX;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
+ * limit:
+ *
+ * a) the maximum tasks value the kernel allows on this architecture
+ * b) the cgroups pids_max attribute for the system
+ * c) the kernel's configured maximum PID value
+ *
+ * And then pick the smallest of the three */
+
+ r = procfs_tasks_get_limit(&a);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
+
+ r = cg_get_root_path(&root);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m");
+ else {
+ _cleanup_free_ char *value = NULL;
+
+ r = cg_get_attribute("pids", root, "pids.max", &value);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
+ else if (!streq(value, "max")) {
+ r = safe_atou64(value, &b);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
+ }
+ }
+
+ return MIN3(TASKS_MAX,
+ a <= 0 ? TASKS_MAX : a,
+ b <= 0 ? TASKS_MAX : b);
+}
+
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
+ uint64_t t, m;
+
+ assert(max > 0);
+
+ /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
+ * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
+
+ t = system_tasks_max();
+ assert(t > 0);
+
+ m = t * v;
+ if (m / t != v) /* overflow? */
+ return UINT64_MAX;
+
+ return m / max;
+}
+
+int version(void) {
+ puts("systemd " STRINGIFY(PROJECT_VERSION) " (" GIT_VERSION ")\n"
+ SYSTEMD_FEATURES);
+ return 0;
+}
+
+/* This is a direct translation of str_verscmp from boot.c */
+static bool is_digit(int c) {
+ return c >= '0' && c <= '9';
+}
+
+static int c_order(int c) {
+ if (c == 0 || is_digit(c))
+ return 0;
+
+ if ((c >= 'a') && (c <= 'z'))
+ return c;
+
+ return c + 0x10000;
+}
+
+int str_verscmp(const char *s1, const char *s2) {
+ const char *os1, *os2;
+
+ assert(s1);
+ assert(s2);
+
+ os1 = s1;
+ os2 = s2;
+
+ while (*s1 || *s2) {
+ int first;
+
+ while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
+ int order;
+
+ order = c_order(*s1) - c_order(*s2);
+ if (order != 0)
+ return order;
+ s1++;
+ s2++;
+ }
+
+ while (*s1 == '0')
+ s1++;
+ while (*s2 == '0')
+ s2++;
+
+ first = 0;
+ while (is_digit(*s1) && is_digit(*s2)) {
+ if (first == 0)
+ first = *s1 - *s2;
+ s1++;
+ s2++;
+ }
+
+ if (is_digit(*s1))
+ return 1;
+ if (is_digit(*s2))
+ return -1;
+
+ if (first != 0)
+ return first;
+ }
+
+ return strcmp(os1, os2);
+}
+
+/* Turn off core dumps but only if we're running outside of a container. */
+void disable_coredumps(void) {
+ int r;
+
+ if (detect_container() > 0)
+ return;
+
+ r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
+}
diff --git a/src/basic/util.h b/src/basic/util.h
new file mode 100644
index 0000000..dc33d66
--- /dev/null
+++ b/src/basic/util.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "format-util.h"
+#include "macro.h"
+#include "time-util.h"
+
+size_t page_size(void) _pure_;
+#define PAGE_ALIGN(l) ALIGN_TO((l), page_size())
+
+static inline const char* yes_no(bool b) {
+ return b ? "yes" : "no";
+}
+
+static inline const char* true_false(bool b) {
+ return b ? "true" : "false";
+}
+
+static inline const char* one_zero(bool b) {
+ return b ? "1" : "0";
+}
+
+static inline const char* enable_disable(bool b) {
+ return b ? "enable" : "disable";
+}
+
+bool plymouth_running(void);
+
+bool display_is_local(const char *display) _pure_;
+
+#define NULSTR_FOREACH(i, l) \
+ for ((i) = (l); (i) && *(i); (i) = strchr((i), 0)+1)
+
+#define NULSTR_FOREACH_PAIR(i, j, l) \
+ for ((i) = (l), (j) = strchr((i), 0)+1; (i) && *(i); (i) = strchr((j), 0)+1, (j) = *(i) ? strchr((i), 0)+1 : (i))
+
+extern int saved_argc;
+extern char **saved_argv;
+
+bool kexec_loaded(void);
+
+int prot_from_flags(int flags) _const_;
+
+bool in_initrd(void);
+void in_initrd_force(bool value);
+
+void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
+ __compar_d_fn_t compar, void *arg);
+
+#define typesafe_bsearch_r(k, b, n, func, userdata) \
+ ({ \
+ const typeof(b[0]) *_k = k; \
+ int (*_func_)(const typeof(b[0])*, const typeof(b[0])*, typeof(userdata)) = func; \
+ xbsearch_r((const void*) _k, (b), (n), sizeof((b)[0]), (__compar_d_fn_t) _func_, userdata); \
+ })
+
+/**
+ * Normal bsearch requires base to be nonnull. Here were require
+ * that only if nmemb > 0.
+ */
+static inline void* bsearch_safe(const void *key, const void *base,
+ size_t nmemb, size_t size, __compar_fn_t compar) {
+ if (nmemb <= 0)
+ return NULL;
+
+ assert(base);
+ return bsearch(key, base, nmemb, size, compar);
+}
+
+#define typesafe_bsearch(k, b, n, func) \
+ ({ \
+ const typeof(b[0]) *_k = k; \
+ int (*_func_)(const typeof(b[0])*, const typeof(b[0])*) = func; \
+ bsearch_safe((const void*) _k, (b), (n), sizeof((b)[0]), (__compar_fn_t) _func_); \
+ })
+
+/**
+ * Normal qsort requires base to be nonnull. Here were require
+ * that only if nmemb > 0.
+ */
+static inline void qsort_safe(void *base, size_t nmemb, size_t size, __compar_fn_t compar) {
+ if (nmemb <= 1)
+ return;
+
+ assert(base);
+ qsort(base, nmemb, size, compar);
+}
+
+/* A wrapper around the above, but that adds typesafety: the element size is automatically derived from the type and so
+ * is the prototype for the comparison function */
+#define typesafe_qsort(p, n, func) \
+ ({ \
+ int (*_func_)(const typeof(p[0])*, const typeof(p[0])*) = func; \
+ qsort_safe((p), (n), sizeof((p)[0]), (__compar_fn_t) _func_); \
+ })
+
+static inline void qsort_r_safe(void *base, size_t nmemb, size_t size, __compar_d_fn_t compar, void *userdata) {
+ if (nmemb <= 1)
+ return;
+
+ assert(base);
+ qsort_r(base, nmemb, size, compar, userdata);
+}
+
+#define typesafe_qsort_r(p, n, func, userdata) \
+ ({ \
+ int (*_func_)(const typeof(p[0])*, const typeof(p[0])*, typeof(userdata)) = func; \
+ qsort_r_safe((p), (n), sizeof((p)[0]), (__compar_d_fn_t) _func_, userdata); \
+ })
+
+/* Normal memcpy requires src to be nonnull. We do nothing if n is 0. */
+static inline void memcpy_safe(void *dst, const void *src, size_t n) {
+ if (n == 0)
+ return;
+ assert(src);
+ memcpy(dst, src, n);
+}
+
+/* Normal memcmp requires s1 and s2 to be nonnull. We do nothing if n is 0. */
+static inline int memcmp_safe(const void *s1, const void *s2, size_t n) {
+ if (n == 0)
+ return 0;
+ assert(s1);
+ assert(s2);
+ return memcmp(s1, s2, n);
+}
+
+/* Compare s1 (length n1) with s2 (length n2) in lexicographic order. */
+static inline int memcmp_nn(const void *s1, size_t n1, const void *s2, size_t n2) {
+ return memcmp_safe(s1, s2, MIN(n1, n2))
+ ?: CMP(n1, n2);
+}
+
+int on_ac_power(void);
+
+#define memzero(x,l) \
+ ({ \
+ size_t _l_ = (l); \
+ void *_x_ = (x); \
+ _l_ == 0 ? _x_ : memset(_x_, 0, _l_); \
+ })
+
+#define zero(x) (memzero(&(x), sizeof(x)))
+
+bool memeqzero(const void *data, size_t length);
+
+#define eqzero(x) memeqzero(x, sizeof(x))
+
+static inline void *mempset(void *s, int c, size_t n) {
+ memset(s, c, n);
+ return (uint8_t*)s + n;
+}
+
+static inline void _reset_errno_(int *saved_errno) {
+ if (*saved_errno < 0) /* Invalidated by UNPROTECT_ERRNO? */
+ return;
+
+ errno = *saved_errno;
+}
+
+#define PROTECT_ERRNO \
+ _cleanup_(_reset_errno_) _unused_ int _saved_errno_ = errno
+
+#define UNPROTECT_ERRNO \
+ do { \
+ errno = _saved_errno_; \
+ _saved_errno_ = -1; \
+ } while (false)
+
+static inline int negative_errno(void) {
+ /* This helper should be used to shut up gcc if you know 'errno' is
+ * negative. Instead of "return -errno;", use "return negative_errno();"
+ * It will suppress bogus gcc warnings in case it assumes 'errno' might
+ * be 0 and thus the caller's error-handling might not be triggered. */
+ assert_return(errno > 0, -EINVAL);
+ return -errno;
+}
+
+static inline unsigned u64log2(uint64_t n) {
+#if __SIZEOF_LONG_LONG__ == 8
+ return (n > 1) ? (unsigned) __builtin_clzll(n) ^ 63U : 0;
+#else
+#error "Wut?"
+#endif
+}
+
+static inline unsigned u32ctz(uint32_t n) {
+#if __SIZEOF_INT__ == 4
+ return n != 0 ? __builtin_ctz(n) : 32;
+#else
+#error "Wut?"
+#endif
+}
+
+static inline unsigned log2i(int x) {
+ assert(x > 0);
+
+ return __SIZEOF_INT__ * 8 - __builtin_clz(x) - 1;
+}
+
+static inline unsigned log2u(unsigned x) {
+ assert(x > 0);
+
+ return sizeof(unsigned) * 8 - __builtin_clz(x) - 1;
+}
+
+static inline unsigned log2u_round_up(unsigned x) {
+ assert(x > 0);
+
+ if (x == 1)
+ return 0;
+
+ return log2u(x - 1) + 1;
+}
+
+int container_get_leader(const char *machine, pid_t *pid);
+
+int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd);
+int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd);
+
+uint64_t physical_memory(void);
+uint64_t physical_memory_scale(uint64_t v, uint64_t max);
+
+uint64_t system_tasks_max(void);
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max);
+
+int version(void);
+
+int str_verscmp(const char *s1, const char *s2);
+
+void disable_coredumps(void);
diff --git a/src/basic/virt.c b/src/basic/virt.c
new file mode 100644
index 0000000..f63f15f
--- /dev/null
+++ b/src/basic/virt.c
@@ -0,0 +1,642 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#include <cpuid.h>
+#endif
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "virt.h"
+
+static int detect_vm_cpuid(void) {
+
+ /* CPUID is an x86 specific interface. */
+#if defined(__i386__) || defined(__x86_64__)
+
+ static const struct {
+ const char *cpuid;
+ int id;
+ } cpuid_vendor_table[] = {
+ { "XenVMMXenVMM", VIRTUALIZATION_XEN },
+ { "KVMKVMKVM", VIRTUALIZATION_KVM },
+ { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU },
+ /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
+ { "VMwareVMware", VIRTUALIZATION_VMWARE },
+ /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
+ { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
+ /* https://wiki.freebsd.org/bhyve */
+ { "bhyve bhyve ", VIRTUALIZATION_BHYVE },
+ { "QNXQVMBSQG", VIRTUALIZATION_QNX },
+ };
+
+ uint32_t eax, ebx, ecx, edx;
+ bool hypervisor;
+
+ /* http://lwn.net/Articles/301888/ */
+
+ /* First detect whether there is a hypervisor */
+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
+ return VIRTUALIZATION_NONE;
+
+ hypervisor = ecx & 0x80000000U;
+
+ if (hypervisor) {
+ union {
+ uint32_t sig32[3];
+ char text[13];
+ } sig = {};
+ unsigned j;
+
+ /* There is a hypervisor, see what it is */
+ __cpuid(0x40000000U, eax, ebx, ecx, edx);
+
+ sig.sig32[0] = ebx;
+ sig.sig32[1] = ecx;
+ sig.sig32[2] = edx;
+
+ log_debug("Virtualization found, CPUID=%s", sig.text);
+
+ for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
+ if (streq(sig.text, cpuid_vendor_table[j].cpuid))
+ return cpuid_vendor_table[j].id;
+
+ return VIRTUALIZATION_VM_OTHER;
+ }
+#endif
+ log_debug("No virtualization found in CPUID");
+
+ return VIRTUALIZATION_NONE;
+}
+
+static int detect_vm_device_tree(void) {
+#if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
+ _cleanup_free_ char *hvtype = NULL;
+ int r;
+
+ r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
+ if (r == -ENOENT) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+
+ dir = opendir("/proc/device-tree");
+ if (!dir) {
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "/proc/device-tree: %m");
+ return VIRTUALIZATION_NONE;
+ }
+ return -errno;
+ }
+
+ FOREACH_DIRENT(dent, dir, return -errno)
+ if (strstr(dent->d_name, "fw-cfg")) {
+ log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
+ return VIRTUALIZATION_QEMU;
+ }
+
+ log_debug("No virtualization found in /proc/device-tree/*");
+ return VIRTUALIZATION_NONE;
+ } else if (r < 0)
+ return r;
+
+ log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
+ if (streq(hvtype, "linux,kvm"))
+ return VIRTUALIZATION_KVM;
+ else if (strstr(hvtype, "xen"))
+ return VIRTUALIZATION_XEN;
+ else
+ return VIRTUALIZATION_VM_OTHER;
+#else
+ log_debug("This platform does not support /proc/device-tree");
+ return VIRTUALIZATION_NONE;
+#endif
+}
+
+static int detect_vm_dmi(void) {
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+
+ static const char *const dmi_vendors[] = {
+ "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
+ "/sys/class/dmi/id/sys_vendor",
+ "/sys/class/dmi/id/board_vendor",
+ "/sys/class/dmi/id/bios_vendor"
+ };
+
+ static const struct {
+ const char *vendor;
+ int id;
+ } dmi_vendor_table[] = {
+ { "KVM", VIRTUALIZATION_KVM },
+ { "QEMU", VIRTUALIZATION_QEMU },
+ /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
+ { "VMware", VIRTUALIZATION_VMWARE },
+ { "VMW", VIRTUALIZATION_VMWARE },
+ { "innotek GmbH", VIRTUALIZATION_ORACLE },
+ { "Xen", VIRTUALIZATION_XEN },
+ { "Bochs", VIRTUALIZATION_BOCHS },
+ { "Parallels", VIRTUALIZATION_PARALLELS },
+ /* https://wiki.freebsd.org/bhyve */
+ { "BHYVE", VIRTUALIZATION_BHYVE },
+ };
+ unsigned i;
+ int r;
+
+ for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
+ _cleanup_free_ char *s = NULL;
+ unsigned j;
+
+ r = read_one_line_file(dmi_vendors[i], &s);
+ if (r < 0) {
+ if (r == -ENOENT)
+ continue;
+
+ return r;
+ }
+
+ for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
+ if (startswith(s, dmi_vendor_table[j].vendor)) {
+ log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
+ return dmi_vendor_table[j].id;
+ }
+ }
+#endif
+
+ log_debug("No virtualization found in DMI");
+
+ return VIRTUALIZATION_NONE;
+}
+
+static int detect_vm_xen(void) {
+
+ /* Check for Dom0 will be executed later in detect_vm_xen_dom0
+ The presence of /proc/xen indicates some form of a Xen domain */
+ if (access("/proc/xen", F_OK) < 0) {
+ log_debug("Virtualization XEN not found, /proc/xen does not exist");
+ return VIRTUALIZATION_NONE;
+ }
+
+ log_debug("Virtualization XEN found (/proc/xen exists)");
+ return VIRTUALIZATION_XEN;
+}
+
+#define XENFEAT_dom0 11 /* xen/include/public/features.h */
+#define PATH_FEATURES "/sys/hypervisor/properties/features"
+/* Returns -errno, or 0 for domU, or 1 for dom0 */
+static int detect_vm_xen_dom0(void) {
+ _cleanup_free_ char *domcap = NULL;
+ char *cap, *i;
+ int r;
+
+ r = read_one_line_file(PATH_FEATURES, &domcap);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ if (r == 0) {
+ unsigned long features;
+
+ /* Here, we need to use sscanf() instead of safe_atoul()
+ * as the string lacks the leading "0x". */
+ r = sscanf(domcap, "%lx", &features);
+ if (r == 1) {
+ r = !!(features & (1U << XENFEAT_dom0));
+ log_debug("Virtualization XEN, found %s with value %08lx, "
+ "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
+ PATH_FEATURES, features, r ? "" : " not");
+ return r;
+ }
+ log_debug("Virtualization XEN, found %s, unhandled content '%s'",
+ PATH_FEATURES, domcap);
+ }
+
+ r = read_one_line_file("/proc/xen/capabilities", &domcap);
+ if (r == -ENOENT) {
+ log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ i = domcap;
+ while ((cap = strsep(&i, ",")))
+ if (streq(cap, "control_d"))
+ break;
+ if (!cap) {
+ log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
+ return 0;
+ }
+
+ log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
+ return 1;
+}
+
+static int detect_vm_hypervisor(void) {
+ _cleanup_free_ char *hvtype = NULL;
+ int r;
+
+ r = read_one_line_file("/sys/hypervisor/type", &hvtype);
+ if (r == -ENOENT)
+ return VIRTUALIZATION_NONE;
+ if (r < 0)
+ return r;
+
+ log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
+
+ if (streq(hvtype, "xen"))
+ return VIRTUALIZATION_XEN;
+ else
+ return VIRTUALIZATION_VM_OTHER;
+}
+
+static int detect_vm_uml(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ /* Detect User-Mode Linux by reading /proc/cpuinfo */
+ f = fopen("/proc/cpuinfo", "re");
+ if (!f) {
+ if (errno == ENOENT) {
+ log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
+ return VIRTUALIZATION_NONE;
+ }
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *t;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ t = startswith(line, "vendor_id\t: ");
+ if (t) {
+ if (startswith(t, "User Mode Linux")) {
+ log_debug("UML virtualization found in /proc/cpuinfo");
+ return VIRTUALIZATION_UML;
+ }
+
+ break;
+ }
+ }
+
+ log_debug("UML virtualization not found in /proc/cpuinfo.");
+ return VIRTUALIZATION_NONE;
+}
+
+static int detect_vm_zvm(void) {
+
+#if defined(__s390__)
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
+ if (r == -ENOENT)
+ return VIRTUALIZATION_NONE;
+ if (r < 0)
+ return r;
+
+ log_debug("Virtualization %s found in /proc/sysinfo", t);
+ if (streq(t, "z/VM"))
+ return VIRTUALIZATION_ZVM;
+ else
+ return VIRTUALIZATION_KVM;
+#else
+ log_debug("This platform does not support /proc/sysinfo");
+ return VIRTUALIZATION_NONE;
+#endif
+}
+
+/* Returns a short identifier for the various VM implementations */
+int detect_vm(void) {
+ static thread_local int cached_found = _VIRTUALIZATION_INVALID;
+ bool other = false;
+ int r, dmi;
+
+ if (cached_found >= 0)
+ return cached_found;
+
+ /* We have to use the correct order here:
+ *
+ * → First, try to detect Oracle Virtualbox, even if it uses KVM, as well as Xen even if it cloaks as Microsoft
+ * Hyper-V.
+ *
+ * → Second, try to detect from CPUID, this will report KVM for whatever software is used even if info in DMI is
+ * overwritten.
+ *
+ * → Third, try to detect from DMI. */
+
+ dmi = detect_vm_dmi();
+ if (IN_SET(dmi, VIRTUALIZATION_ORACLE, VIRTUALIZATION_XEN)) {
+ r = dmi;
+ goto finish;
+ }
+
+ r = detect_vm_cpuid();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ /* Now, let's get back to DMI */
+ if (dmi < 0)
+ return dmi;
+ if (dmi == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (dmi != VIRTUALIZATION_NONE) {
+ r = dmi;
+ goto finish;
+ }
+
+ /* x86 xen will most likely be detected by cpuid. If not (most likely
+ * because we're not an x86 guest), then we should try the /proc/xen
+ * directory next. If that's not found, then we check for the high-level
+ * hypervisor sysfs file.
+ */
+
+ r = detect_vm_xen();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ r = detect_vm_hypervisor();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ r = detect_vm_device_tree();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ r = detect_vm_uml();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ r = detect_vm_zvm();
+ if (r < 0)
+ return r;
+
+finish:
+ /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
+ * In order to detect the Dom0 as not virtualization we need to
+ * double-check it */
+ if (r == VIRTUALIZATION_XEN) {
+ int dom0;
+
+ dom0 = detect_vm_xen_dom0();
+ if (dom0 < 0)
+ return dom0;
+ if (dom0 > 0)
+ r = VIRTUALIZATION_NONE;
+ } else if (r == VIRTUALIZATION_NONE && other)
+ r = VIRTUALIZATION_VM_OTHER;
+
+ cached_found = r;
+ log_debug("Found VM virtualization %s", virtualization_to_string(r));
+ return r;
+}
+
+int detect_container(void) {
+
+ static const struct {
+ const char *value;
+ int id;
+ } value_table[] = {
+ { "lxc", VIRTUALIZATION_LXC },
+ { "lxc-libvirt", VIRTUALIZATION_LXC_LIBVIRT },
+ { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
+ { "docker", VIRTUALIZATION_DOCKER },
+ { "rkt", VIRTUALIZATION_RKT },
+ };
+
+ static thread_local int cached_found = _VIRTUALIZATION_INVALID;
+ _cleanup_free_ char *m = NULL;
+ const char *e = NULL;
+ unsigned j;
+ int r;
+
+ if (cached_found >= 0)
+ return cached_found;
+
+ /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
+ if (access("/proc/vz", F_OK) >= 0 &&
+ access("/proc/bc", F_OK) < 0) {
+ r = VIRTUALIZATION_OPENVZ;
+ goto finish;
+ }
+
+ if (getpid_cached() == 1) {
+ /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
+
+ e = getenv("container");
+ if (isempty(e)) {
+ r = VIRTUALIZATION_NONE;
+ goto finish;
+ }
+
+ goto translate_name;
+ }
+
+ /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
+ * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
+ r = read_one_line_file("/run/systemd/container", &m);
+ if (r >= 0) {
+ e = m;
+ goto translate_name;
+ }
+ if (r != -ENOENT)
+ return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
+
+ /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
+ r = getenv_for_pid(1, "container", &m);
+ if (r > 0) {
+ e = m;
+ goto translate_name;
+ }
+ if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
+ log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
+
+ /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
+ * there is not 1, we know we are in a PID namespace. and hence a container. */
+ r = read_one_line_file("/proc/1/sched", &m);
+ if (r >= 0) {
+ const char *t;
+
+ t = strrchr(m, '(');
+ if (!t)
+ return -EIO;
+
+ if (!startswith(t, "(1,")) {
+ r = VIRTUALIZATION_CONTAINER_OTHER;
+ goto finish;
+ }
+ } else if (r != -ENOENT)
+ return r;
+
+ /* If that didn't work, give up, assume no container manager. */
+ r = VIRTUALIZATION_NONE;
+ goto finish;
+
+translate_name:
+ for (j = 0; j < ELEMENTSOF(value_table); j++)
+ if (streq(e, value_table[j].value)) {
+ r = value_table[j].id;
+ goto finish;
+ }
+
+ r = VIRTUALIZATION_CONTAINER_OTHER;
+
+finish:
+ log_debug("Found container virtualization %s.", virtualization_to_string(r));
+ cached_found = r;
+ return r;
+}
+
+int detect_virtualization(void) {
+ int r;
+
+ r = detect_container();
+ if (r == 0)
+ r = detect_vm();
+
+ return r;
+}
+
+static int userns_has_mapping(const char *name) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *buf = NULL;
+ size_t n_allocated = 0;
+ ssize_t n;
+ uint32_t a, b, c;
+ int r;
+
+ f = fopen(name, "re");
+ if (!f) {
+ log_debug_errno(errno, "Failed to open %s: %m", name);
+ return errno == ENOENT ? false : -errno;
+ }
+
+ n = getline(&buf, &n_allocated, f);
+ if (n < 0) {
+ if (feof(f)) {
+ log_debug("%s is empty, we're in an uninitialized user namespace", name);
+ return true;
+ }
+
+ return log_debug_errno(errno, "Failed to read %s: %m", name);
+ }
+
+ r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
+ if (r < 3)
+ return log_debug_errno(errno, "Failed to parse %s: %m", name);
+
+ if (a == 0 && b == 0 && c == UINT32_MAX) {
+ /* The kernel calls mappings_overlap() and does not allow overlaps */
+ log_debug("%s has a full 1:1 mapping", name);
+ return false;
+ }
+
+ /* Anything else implies that we are in a user namespace */
+ log_debug("Mapping found in %s, we're in a user namespace", name);
+ return true;
+}
+
+int running_in_userns(void) {
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ r = userns_has_mapping("/proc/self/uid_map");
+ if (r != 0)
+ return r;
+
+ r = userns_has_mapping("/proc/self/gid_map");
+ if (r != 0)
+ return r;
+
+ /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
+ * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
+ * also does not exist. We cannot distinguish those two cases, so assume that
+ * we're running on a stripped-down recent kernel, rather than on an old one,
+ * and if the file is not found, return false.
+ */
+ r = read_one_line_file("/proc/self/setgroups", &line);
+ if (r < 0) {
+ log_debug_errno(r, "/proc/self/setgroups: %m");
+ return r == -ENOENT ? false : r;
+ }
+
+ truncate_nl(line);
+ r = streq(line, "deny");
+ /* See user_namespaces(7) for a description of this "setgroups" contents. */
+ log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
+ return r;
+}
+
+int running_in_chroot(void) {
+ int r;
+
+ if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
+ return 0;
+
+ r = files_same("/proc/1/root", "/", 0);
+ if (r < 0)
+ return r;
+
+ return r == 0;
+}
+
+static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
+ [VIRTUALIZATION_NONE] = "none",
+ [VIRTUALIZATION_KVM] = "kvm",
+ [VIRTUALIZATION_QEMU] = "qemu",
+ [VIRTUALIZATION_BOCHS] = "bochs",
+ [VIRTUALIZATION_XEN] = "xen",
+ [VIRTUALIZATION_UML] = "uml",
+ [VIRTUALIZATION_VMWARE] = "vmware",
+ [VIRTUALIZATION_ORACLE] = "oracle",
+ [VIRTUALIZATION_MICROSOFT] = "microsoft",
+ [VIRTUALIZATION_ZVM] = "zvm",
+ [VIRTUALIZATION_PARALLELS] = "parallels",
+ [VIRTUALIZATION_BHYVE] = "bhyve",
+ [VIRTUALIZATION_QNX] = "qnx",
+ [VIRTUALIZATION_VM_OTHER] = "vm-other",
+
+ [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
+ [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
+ [VIRTUALIZATION_LXC] = "lxc",
+ [VIRTUALIZATION_OPENVZ] = "openvz",
+ [VIRTUALIZATION_DOCKER] = "docker",
+ [VIRTUALIZATION_RKT] = "rkt",
+ [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(virtualization, int);
diff --git a/src/basic/virt.h b/src/basic/virt.h
new file mode 100644
index 0000000..c4cf4bf
--- /dev/null
+++ b/src/basic/virt.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+enum {
+ VIRTUALIZATION_NONE = 0,
+
+ VIRTUALIZATION_VM_FIRST,
+ VIRTUALIZATION_KVM = VIRTUALIZATION_VM_FIRST,
+ VIRTUALIZATION_QEMU,
+ VIRTUALIZATION_BOCHS,
+ VIRTUALIZATION_XEN,
+ VIRTUALIZATION_UML,
+ VIRTUALIZATION_VMWARE,
+ VIRTUALIZATION_ORACLE,
+ VIRTUALIZATION_MICROSOFT,
+ VIRTUALIZATION_ZVM,
+ VIRTUALIZATION_PARALLELS,
+ VIRTUALIZATION_BHYVE,
+ VIRTUALIZATION_QNX,
+ VIRTUALIZATION_VM_OTHER,
+ VIRTUALIZATION_VM_LAST = VIRTUALIZATION_VM_OTHER,
+
+ VIRTUALIZATION_CONTAINER_FIRST,
+ VIRTUALIZATION_SYSTEMD_NSPAWN = VIRTUALIZATION_CONTAINER_FIRST,
+ VIRTUALIZATION_LXC_LIBVIRT,
+ VIRTUALIZATION_LXC,
+ VIRTUALIZATION_OPENVZ,
+ VIRTUALIZATION_DOCKER,
+ VIRTUALIZATION_RKT,
+ VIRTUALIZATION_CONTAINER_OTHER,
+ VIRTUALIZATION_CONTAINER_LAST = VIRTUALIZATION_CONTAINER_OTHER,
+
+ _VIRTUALIZATION_MAX,
+ _VIRTUALIZATION_INVALID = -1
+};
+
+static inline bool VIRTUALIZATION_IS_VM(int x) {
+ return x >= VIRTUALIZATION_VM_FIRST && x <= VIRTUALIZATION_VM_LAST;
+}
+
+static inline bool VIRTUALIZATION_IS_CONTAINER(int x) {
+ return x >= VIRTUALIZATION_CONTAINER_FIRST && x <= VIRTUALIZATION_CONTAINER_LAST;
+}
+
+int detect_vm(void);
+int detect_container(void);
+int detect_virtualization(void);
+
+int running_in_userns(void);
+int running_in_chroot(void);
+
+const char *virtualization_to_string(int v) _const_;
+int virtualization_from_string(const char *s) _pure_;
diff --git a/src/basic/xattr-util.c b/src/basic/xattr-util.c
new file mode 100644
index 0000000..0ee0979
--- /dev/null
+++ b/src/basic/xattr-util.c
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/xattr.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "sparse-endian.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "xattr-util.h"
+
+int getxattr_malloc(const char *path, const char *name, char **value, bool allow_symlink) {
+ char *v;
+ size_t l;
+ ssize_t n;
+
+ assert(path);
+ assert(name);
+ assert(value);
+
+ for (l = 100; ; l = (size_t) n + 1) {
+ v = new0(char, l);
+ if (!v)
+ return -ENOMEM;
+
+ if (allow_symlink)
+ n = lgetxattr(path, name, v, l);
+ else
+ n = getxattr(path, name, v, l);
+
+ if (n >= 0 && (size_t) n < l) {
+ *value = v;
+ return n;
+ }
+
+ free(v);
+
+ if (n < 0 && errno != ERANGE)
+ return -errno;
+
+ if (allow_symlink)
+ n = lgetxattr(path, name, NULL, 0);
+ else
+ n = getxattr(path, name, NULL, 0);
+ if (n < 0)
+ return -errno;
+ }
+}
+
+int fgetxattr_malloc(int fd, const char *name, char **value) {
+ char *v;
+ size_t l;
+ ssize_t n;
+
+ assert(fd >= 0);
+ assert(name);
+ assert(value);
+
+ for (l = 100; ; l = (size_t) n + 1) {
+ v = new0(char, l);
+ if (!v)
+ return -ENOMEM;
+
+ n = fgetxattr(fd, name, v, l);
+
+ if (n >= 0 && (size_t) n < l) {
+ *value = v;
+ return n;
+ }
+
+ free(v);
+
+ if (n < 0 && errno != ERANGE)
+ return -errno;
+
+ n = fgetxattr(fd, name, NULL, 0);
+ if (n < 0)
+ return -errno;
+ }
+}
+
+int fgetxattrat_fake(
+ int dirfd,
+ const char *filename,
+ const char *attribute,
+ void *value, size_t size,
+ int flags,
+ size_t *ret_size) {
+
+ char fn[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ _cleanup_close_ int fd = -1;
+ ssize_t l;
+
+ /* The kernel doesn't have a fgetxattrat() command, hence let's emulate one */
+
+ if (flags & ~(AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH))
+ return -EINVAL;
+
+ if (isempty(filename)) {
+ if (!(flags & AT_EMPTY_PATH))
+ return -EINVAL;
+
+ xsprintf(fn, "/proc/self/fd/%i", dirfd);
+ } else {
+ fd = openat(dirfd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_NOFOLLOW ? O_NOFOLLOW : 0));
+ if (fd < 0)
+ return -errno;
+
+ xsprintf(fn, "/proc/self/fd/%i", fd);
+ }
+
+ l = getxattr(fn, attribute, value, size);
+ if (l < 0)
+ return -errno;
+
+ *ret_size = l;
+ return 0;
+}
+
+static int parse_crtime(le64_t le, usec_t *usec) {
+ uint64_t u;
+
+ assert(usec);
+
+ u = le64toh(le);
+ if (IN_SET(u, 0, (uint64_t) -1))
+ return -EIO;
+
+ *usec = (usec_t) u;
+ return 0;
+}
+
+int fd_getcrtime_at(int dirfd, const char *name, usec_t *ret, int flags) {
+ struct_statx sx;
+ usec_t a, b;
+ le64_t le;
+ size_t n;
+ int r;
+
+ assert(ret);
+
+ if (flags & ~(AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW))
+ return -EINVAL;
+
+ /* So here's the deal: the creation/birth time (crtime/btime) of a file is a relatively newly supported concept
+ * on Linux (or more strictly speaking: a concept that only recently got supported in the API, it was
+ * implemented on various file systems on the lower level since a while, but never was accessible). However, we
+ * needed a concept like that for vaccuuming algorithms and such, hence we emulated it via a user xattr for a
+ * long time. Starting with Linux 4.11 there's statx() which exposes the timestamp to userspace for the first
+ * time, where it is available. Thius function will read it, but it tries to keep some compatibility with older
+ * systems: we try to read both the crtime/btime and the xattr, and then use whatever is older. After all the
+ * concept is useful for determining how "old" a file really is, and hence using the older of the two makes
+ * most sense. */
+
+ if (statx(dirfd, strempty(name), flags|AT_STATX_DONT_SYNC, STATX_BTIME, &sx) >= 0 &&
+ (sx.stx_mask & STATX_BTIME) &&
+ sx.stx_btime.tv_sec != 0)
+ a = (usec_t) sx.stx_btime.tv_sec * USEC_PER_SEC +
+ (usec_t) sx.stx_btime.tv_nsec / NSEC_PER_USEC;
+ else
+ a = USEC_INFINITY;
+
+ r = fgetxattrat_fake(dirfd, name, "user.crtime_usec", &le, sizeof(le), flags, &n);
+ if (r >= 0) {
+ if (n != sizeof(le))
+ r = -EIO;
+ else
+ r = parse_crtime(le, &b);
+ }
+ if (r < 0) {
+ if (a != USEC_INFINITY) {
+ *ret = a;
+ return 0;
+ }
+
+ return r;
+ }
+
+ if (a != USEC_INFINITY)
+ *ret = MIN(a, b);
+ else
+ *ret = b;
+
+ return 0;
+}
+
+int fd_getcrtime(int fd, usec_t *ret) {
+ return fd_getcrtime_at(fd, NULL, ret, AT_EMPTY_PATH);
+}
+
+int path_getcrtime(const char *p, usec_t *ret) {
+ return fd_getcrtime_at(AT_FDCWD, p, ret, 0);
+}
+
+int fd_setcrtime(int fd, usec_t usec) {
+ le64_t le;
+
+ assert(fd >= 0);
+
+ if (IN_SET(usec, 0, USEC_INFINITY))
+ usec = now(CLOCK_REALTIME);
+
+ le = htole64((uint64_t) usec);
+ if (fsetxattr(fd, "user.crtime_usec", &le, sizeof(le), 0) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/basic/xattr-util.h b/src/basic/xattr-util.h
new file mode 100644
index 0000000..9fa85d7
--- /dev/null
+++ b/src/basic/xattr-util.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/types.h>
+
+#include "time-util.h"
+
+int getxattr_malloc(const char *path, const char *name, char **value, bool allow_symlink);
+int fgetxattr_malloc(int fd, const char *name, char **value);
+
+int fgetxattrat_fake(
+ int dirfd,
+ const char *filename,
+ const char *attribute,
+ void *value, size_t size,
+ int flags,
+ size_t *ret_size);
+
+int fd_setcrtime(int fd, usec_t usec);
+
+int fd_getcrtime(int fd, usec_t *usec);
+int path_getcrtime(const char *p, usec_t *usec);
+int fd_getcrtime_at(int dirfd, const char *name, usec_t *usec, int flags);